diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -229,6 +229,7 @@ llvm::SmallDenseMap MappedDeclsFields; bool AllEscaped = false; bool IsForCombinedParallelRegion = false; + bool IsInSPMDKernel = false; void markAsEscaped(const ValueDecl *VD) { // Do not globalize declare target variables. @@ -242,6 +243,9 @@ // Variables captured by value must be globalized. if (auto *CSI = CGF.CapturedStmtInfo) { if (const FieldDecl *FD = CSI->lookup(cast(VD))) { + // Do not globalize captured vars in SPMD mode. + if (IsInSPMDKernel) + return; // Check if need to capture the variable that was already captured by // value in the outer region. if (!IsForCombinedParallelRegion) { @@ -351,9 +355,10 @@ public: CheckVarsEscapingDeclContext(CodeGenFunction &CGF, - ArrayRef TeamsReductions) - : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) { - } + ArrayRef TeamsReductions, + bool IsInSPMDKernel = false) + : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()), + IsInSPMDKernel(IsInSPMDKernel) {} virtual ~CheckVarsEscapingDeclContext() = default; void VisitDeclStmt(const DeclStmt *S) { if (!S) @@ -1639,65 +1644,30 @@ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { SourceLocation Loc = D.getBeginLoc(); - const RecordDecl *GlobalizedRD = nullptr; - llvm::SmallVector LastPrivatesReductions; - llvm::SmallDenseMap MappedDeclsFields; - unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size); - // Globalize team reductions variable unconditionally in all modes. - if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) - getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions); - if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD) { - getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions); - if (!LastPrivatesReductions.empty()) { - GlobalizedRD = ::buildRecordForGlobalizedVars( - CGM.getContext(), llvm::None, LastPrivatesReductions, - MappedDeclsFields, WarpSize); - } - } else if (!LastPrivatesReductions.empty()) { - assert(!TeamAndReductions.first && - "Previous team declaration is not expected."); - TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl(); - std::swap(TeamAndReductions.second, LastPrivatesReductions); - } + assert(!TeamAndReductions.first && + "Previous team declaration is not expected."); + if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD) + getDistributeLastprivateVars(CGM.getContext(), D, TeamAndReductions.second); + else + getTeamsReductionVars(CGM.getContext(), D, TeamAndReductions.second); + TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl(); // Emit target region as a standalone region. class NVPTXPrePostActionTy : public PrePostActionTy { SourceLocation &Loc; - const RecordDecl *GlobalizedRD; - llvm::SmallDenseMap - &MappedDeclsFields; public: - NVPTXPrePostActionTy( - SourceLocation &Loc, const RecordDecl *GlobalizedRD, - llvm::SmallDenseMap - &MappedDeclsFields) - : Loc(Loc), GlobalizedRD(GlobalizedRD), - MappedDeclsFields(MappedDeclsFields) {} + NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {} void Enter(CodeGenFunction &CGF) override { auto &Rt = static_cast(CGF.CGM.getOpenMPRuntime()); - if (GlobalizedRD) { - auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first; - I->getSecond().GlobalRecord = GlobalizedRD; - I->getSecond().MappedParams = - std::make_unique(); - DeclToAddrMapTy &Data = I->getSecond().LocalVarData; - for (const auto &Pair : MappedDeclsFields) { - assert(Pair.getFirst()->isCanonicalDecl() && - "Expected canonical declaration"); - Data.insert(std::make_pair(Pair.getFirst(), - MappedVarData(Pair.getSecond(), - /*IsOnePerTeam=*/true))); - } - } Rt.emitGenericVarsProlog(CGF, Loc); } void Exit(CodeGenFunction &CGF) override { static_cast(CGF.CGM.getOpenMPRuntime()) .emitGenericVarsEpilog(CGF); } - } Action(Loc, GlobalizedRD, MappedDeclsFields); + } Action(Loc); CodeGen.setAction(Action); llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen); @@ -4147,7 +4117,8 @@ void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { - if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic) + if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic && + getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) return; assert(D && "Expected function or captured|block decl."); @@ -4164,13 +4135,16 @@ } else if (const auto *CD = dyn_cast(D)) { Body = CD->getBody(); NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP; - if (NeedToDelayGlobalization && + if (NeedToDelayGlobalization && !IsInTTDRegion && getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD) return; } if (!Body) return; - CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second); + bool IsInSPMDKernel = NeedToDelayGlobalization && + getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD; + CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second, + IsInSPMDKernel); VarChecker.Visit(Body); const RecordDecl *GlobalizedVarsRecord = VarChecker.getGlobalizedRecord(IsInTTDRegion); @@ -4195,6 +4169,8 @@ const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD); Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion))); } + if (IsInSPMDKernel) + return; if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) { CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None); VarChecker.Visit(Body); diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -29,9 +29,12 @@ int main(int argc, char **argv) { int b[10], c[10], d[10]; #pragma omp target teams map(tofrom:a) + { + double escaped = 0; #pragma omp distribute parallel for firstprivate(b) lastprivate(c) if(a) for (int i= 0; i < argc; ++i) - a = foo(&i) + foo(&a) + foo(&b[i]) + foo(&c[i]) + foo(&d[i]); + a = foo(&i) + foo(&a) + foo(&b[i]) + foo(&c[i]) + foo(&d[i]) + escaped; + } return 0; } @@ -98,7 +101,7 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x i8*], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 @@ -117,7 +120,9 @@ // CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct._globalized_locals_ty* -// CHECK1-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 1 +// CHECK1-NEXT: [[ESCAPED:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: store double 0.000000e+00, double* [[ESCAPED]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -169,88 +174,91 @@ // CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 // CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP25]] to i8* // CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP27]] to i8* // CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[CONV]] to i8* // CHECK1-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK1-NEXT: [[TMP35:%.*]] = bitcast i32* [[TMP2]] to i8* // CHECK1-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 // CHECK1-NEXT: [[TMP37:%.*]] = bitcast [10 x i32]* [[B4]] to i8* // CHECK1-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 // CHECK1-NEXT: [[TMP39:%.*]] = bitcast [10 x i32]* [[C1]] to i8* // CHECK1-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 // CHECK1-NEXT: [[TMP41:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* // CHECK1-NEXT: store i8* [[TMP41]], i8** [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK1-NEXT: [[TMP43:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP45]], i32 [[TMP43]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP46]], i64 7) +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 7 +// CHECK1-NEXT: [[TMP43:%.*]] = bitcast double* [[ESCAPED]] to i8* +// CHECK1-NEXT: store i8* [[TMP43]], i8** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK1-NEXT: [[TMP45:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast [8 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP47]], i32 [[TMP45]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*, double*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP48]], i64 8) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP51]], [[TMP52]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP51]], [[TMP52]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP53]], [[TMP54]] +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP55]], [[TMP56]] // CHECK1-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK1: cond.true12: -// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END14:%.*]] // CHECK1: cond.false13: -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END14]] // CHECK1: cond.end14: -// CHECK1-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP55]], [[COND_TRUE12]] ], [ [[TMP56]], [[COND_FALSE13]] ] +// CHECK1-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP57]], [[COND_TRUE12]] ], [ [[TMP58]], [[COND_FALSE13]] ] // CHECK1-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP57]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP59]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP58:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP58]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP59]]) -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0 -// CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP60:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[TMP60]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP61]]) +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0 +// CHECK1-NEXT: br i1 [[TMP63]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP62:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK1-NEXT: [[TMP63:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP62]], i8* align 4 [[TMP63]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP64:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK1-NEXT: [[TMP65:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP64]], i8* align 8 [[TMP65]], i64 40, i1 false) // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: -// CHECK1-NEXT: [[TMP64:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK1-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP64]]) +// CHECK1-NEXT: [[TMP66:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 +// CHECK1-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP66]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]], double* nonnull align 8 dereferenceable(8) [[ESCAPED:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -261,6 +269,7 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[ESCAPED_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -282,95 +291,101 @@ // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8 // CHECK1-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8 +// CHECK1-NEXT: store double* [[ESCAPED]], double** [[ESCAPED_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load double*, double** [[ESCAPED_ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP17]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I6]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B4]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: [[CALL11:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C5]], i64 0, i64 [[IDXPROM13]] // CHECK1-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK1-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP22]] to i64 // CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM17]] // CHECK1-NEXT: [[CALL19:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX18]]) #[[ATTR5]] // CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]] -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[TMP1]], align 4 +// CHECK1-NEXT: [[CONV21:%.*]] = sitofp i32 [[ADD20]] to double +// CHECK1-NEXT: [[TMP23:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK1-NEXT: [[ADD22:%.*]] = fadd double [[CONV21]], [[TMP23]] +// CHECK1-NEXT: [[CONV23:%.*]] = fptosi double [[ADD22]] to i32 +// CHECK1-NEXT: store i32 [[CONV23]], i32* [[TMP1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK1-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C5]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [10 x i32]* [[C5]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 40, i1 false) // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -440,7 +455,7 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 // CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 8 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x i8*], align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 @@ -453,9 +468,11 @@ // CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK2-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 40, i16 1) +// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 48, i16 1) // CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK2-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK2-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 1 +// CHECK2-NEXT: [[ESCAPED:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK2-NEXT: store double 0.000000e+00, double* [[ESCAPED]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -507,77 +524,80 @@ // CHECK2-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP22]] to i8* // CHECK2-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP24]] to i8* // CHECK2-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK2-NEXT: [[TMP30:%.*]] = bitcast i32* [[CONV]] to i8* // CHECK2-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK2-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP2]] to i8* // CHECK2-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 -// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 // CHECK2-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[B4]] to i8* // CHECK2-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 -// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 // CHECK2-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[C1]] to i8* // CHECK2-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 8 -// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 +// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 // CHECK2-NEXT: [[TMP38:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* // CHECK2-NEXT: store i8* [[TMP38]], i8** [[TMP37]], align 8 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP39]], 0 -// CHECK2-NEXT: [[TMP40:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP42]], i32 [[TMP40]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP43]], i64 7) +// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 7 +// CHECK2-NEXT: [[TMP40:%.*]] = bitcast double* [[ESCAPED]] to i8* +// CHECK2-NEXT: store i8* [[TMP40]], i8** [[TMP39]], align 8 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK2-NEXT: [[TMP42:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = bitcast [8 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP44]], i32 [[TMP42]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*, double*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP45]], i64 8) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP48]], [[TMP49]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP48]], [[TMP49]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP50]], [[TMP51]] +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP50]], [[TMP51]] +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP52]], [[TMP53]] // CHECK2-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK2: cond.true12: -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END14:%.*]] // CHECK2: cond.false13: -// CHECK2-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END14]] // CHECK2: cond.end14: -// CHECK2-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP52]], [[COND_TRUE12]] ], [ [[TMP53]], [[COND_FALSE13]] ] +// CHECK2-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP54]], [[COND_TRUE12]] ], [ [[TMP55]], [[COND_FALSE13]] ] // CHECK2-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP54]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP56]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]]) -// CHECK2-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0 -// CHECK2-NEXT: br i1 [[TMP58]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2-NEXT: [[TMP57:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP57]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP58]]) +// CHECK2-NEXT: [[TMP59:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0 +// CHECK2-NEXT: br i1 [[TMP60]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP59:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK2-NEXT: [[TMP60:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP59]], i8* align 4 [[TMP60]], i64 40, i1 false) +// CHECK2-NEXT: [[TMP61:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK2-NEXT: [[TMP62:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP61]], i8* align 8 [[TMP62]], i64 40, i1 false) // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -587,7 +607,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]], double* nonnull align 8 dereferenceable(8) [[ESCAPED:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -598,6 +618,7 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK2-NEXT: [[ESCAPED_ADDR:%.*]] = alloca double*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -619,95 +640,101 @@ // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK2-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8 // CHECK2-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8 +// CHECK2-NEXT: store double* [[ESCAPED]], double** [[ESCAPED_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8 // CHECK2-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK2-NEXT: [[TMP5:%.*]] = load double*, double** [[ESCAPED_ADDR]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 40, i1 false) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK2-NEXT: [[TMP13:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 40, i1 false) +// CHECK2-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP17]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP18]] // CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 // CHECK2-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I6]]) #[[ATTR5:[0-9]+]] // CHECK2-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]] -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B4]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: [[CALL11:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]] -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK2-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C5]], i64 0, i64 [[IDXPROM13]] // CHECK2-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] // CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]] -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK2-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP22]] to i64 // CHECK2-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM17]] // CHECK2-NEXT: [[CALL19:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX18]]) #[[ATTR5]] // CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]] -// CHECK2-NEXT: store i32 [[ADD20]], i32* [[TMP1]], align 4 +// CHECK2-NEXT: [[CONV21:%.*]] = sitofp i32 [[ADD20]] to double +// CHECK2-NEXT: [[TMP23:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK2-NEXT: [[ADD22:%.*]] = fadd double [[CONV21]], [[TMP23]] +// CHECK2-NEXT: [[CONV23:%.*]] = fptosi double [[ADD22]] to i32 +// CHECK2-NEXT: store i32 [[CONV23]], i32* [[TMP1]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK2-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK2-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C5]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 40, i1 false) +// CHECK2-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK2-NEXT: [[TMP31:%.*]] = bitcast [10 x i32]* [[C5]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 40, i1 false) // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -775,7 +802,7 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 // CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x i8*], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 @@ -793,7 +820,9 @@ // CHECK3-NEXT: [[TMP6:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct._globalized_locals_ty* -// CHECK3-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 1 +// CHECK3-NEXT: [[ESCAPED:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: store double 0.000000e+00, double* [[ESCAPED]], align 8 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -843,88 +872,91 @@ // CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP24]] to i8* // CHECK3-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK3-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP25]] to i8* // CHECK3-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK3-NEXT: [[TMP31:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* // CHECK3-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK3-NEXT: [[TMP33:%.*]] = bitcast i32* [[TMP2]] to i8* // CHECK3-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 // CHECK3-NEXT: [[TMP35:%.*]] = bitcast [10 x i32]* [[B4]] to i8* // CHECK3-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 // CHECK3-NEXT: [[TMP37:%.*]] = bitcast [10 x i32]* [[C1]] to i8* // CHECK3-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 // CHECK3-NEXT: [[TMP39:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* // CHECK3-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP40]], 0 -// CHECK3-NEXT: [[TMP41:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK3-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP43]], i32 [[TMP41]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP44]], i32 7) +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP41:%.*]] = bitcast double* [[ESCAPED]] to i8* +// CHECK3-NEXT: store i8* [[TMP41]], i8** [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK3-NEXT: [[TMP43:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = bitcast [8 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP45]], i32 [[TMP43]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*, double*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP46]], i32 8) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] +// CHECK3-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP51]], [[TMP52]] +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP53]], [[TMP54]] // CHECK3-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK3: cond.true12: -// CHECK3-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END14:%.*]] // CHECK3: cond.false13: -// CHECK3-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END14]] // CHECK3: cond.end14: -// CHECK3-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP53]], [[COND_TRUE12]] ], [ [[TMP54]], [[COND_FALSE13]] ] +// CHECK3-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP55]], [[COND_TRUE12]] ], [ [[TMP56]], [[COND_FALSE13]] ] // CHECK3-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP55]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP57]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP56:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP57:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP57]]) -// CHECK3-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP59:%.*]] = icmp ne i32 [[TMP58]], 0 -// CHECK3-NEXT: br i1 [[TMP59]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP58:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP59]]) +// CHECK3-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0 +// CHECK3-NEXT: br i1 [[TMP61]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP60:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK3-NEXT: [[TMP61:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP60]], i8* align 4 [[TMP61]], i32 40, i1 false) +// CHECK3-NEXT: [[TMP62:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK3-NEXT: [[TMP63:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP62]], i8* align 8 [[TMP63]], i32 40, i1 false) // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: -// CHECK3-NEXT: [[TMP62:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK3-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP62]]) +// CHECK3-NEXT: [[TMP64:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 +// CHECK3-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP64]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]], double* nonnull align 8 dereferenceable(8) [[ESCAPED:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 @@ -935,6 +967,7 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[ESCAPED_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -956,89 +989,95 @@ // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK3-NEXT: store double* [[ESCAPED]], double** [[ESCAPED_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[ESCAPED_ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[B3]] to i8* +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 40, i1 false) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 // CHECK3-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] // CHECK3-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP20]] // CHECK3-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP22]] // CHECK3-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] // CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK3-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD16]] to double +// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK3-NEXT: [[ADD17:%.*]] = fadd double [[CONV]], [[TMP23]] +// CHECK3-NEXT: [[CONV18:%.*]] = fptosi double [[ADD17]] to i32 +// CHECK3-NEXT: store i32 [[CONV18]], i32* [[TMP1]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK3-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) +// CHECK3-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK3-NEXT: [[TMP31:%.*]] = bitcast [10 x i32]* [[C4]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 40, i1 false) // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -1106,7 +1145,7 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 // CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 +// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x i8*], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 @@ -1124,7 +1163,9 @@ // CHECK4-NEXT: [[TMP6:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 // CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i32 0 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct._globalized_locals_ty* -// CHECK4-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 0 +// CHECK4-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 1 +// CHECK4-NEXT: [[ESCAPED:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 0 +// CHECK4-NEXT: store double 0.000000e+00, double* [[ESCAPED]], align 8 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -1174,88 +1215,91 @@ // CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK4-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP24]] to i8* // CHECK4-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK4-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP25]] to i8* // CHECK4-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK4-NEXT: [[TMP31:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* // CHECK4-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK4-NEXT: [[TMP33:%.*]] = bitcast i32* [[TMP2]] to i8* // CHECK4-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 4 -// CHECK4-NEXT: [[TMP34:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP34:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 // CHECK4-NEXT: [[TMP35:%.*]] = bitcast [10 x i32]* [[B4]] to i8* // CHECK4-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 4 -// CHECK4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 +// CHECK4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 // CHECK4-NEXT: [[TMP37:%.*]] = bitcast [10 x i32]* [[C1]] to i8* // CHECK4-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 4 -// CHECK4-NEXT: [[TMP38:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP38:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 // CHECK4-NEXT: [[TMP39:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* // CHECK4-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 4 -// CHECK4-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP40]], 0 -// CHECK4-NEXT: [[TMP41:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK4-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK4-NEXT: [[TMP44:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP43]], i32 [[TMP41]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP44]], i32 7) +// CHECK4-NEXT: [[TMP40:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 7 +// CHECK4-NEXT: [[TMP41:%.*]] = bitcast double* [[ESCAPED]] to i8* +// CHECK4-NEXT: store i8* [[TMP41]], i8** [[TMP40]], align 4 +// CHECK4-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK4-NEXT: [[TMP43:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK4-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK4-NEXT: [[TMP46:%.*]] = bitcast [8 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP45]], i32 [[TMP43]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*, double*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP46]], i32 8) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] -// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] +// CHECK4-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP51]], [[TMP52]] +// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP53]], [[TMP54]] // CHECK4-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK4: cond.true12: -// CHECK4-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK4-NEXT: br label [[COND_END14:%.*]] // CHECK4: cond.false13: -// CHECK4-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: br label [[COND_END14]] // CHECK4: cond.end14: -// CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP53]], [[COND_TRUE12]] ], [ [[TMP54]], [[COND_FALSE13]] ] +// CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP55]], [[COND_TRUE12]] ], [ [[TMP56]], [[COND_FALSE13]] ] // CHECK4-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP55]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP57]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP56:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP57:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP57]]) -// CHECK4-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP59:%.*]] = icmp ne i32 [[TMP58]], 0 -// CHECK4-NEXT: br i1 [[TMP59]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: [[TMP58:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP59]]) +// CHECK4-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0 +// CHECK4-NEXT: br i1 [[TMP61]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP60:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK4-NEXT: [[TMP61:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP60]], i8* align 4 [[TMP61]], i32 40, i1 false) +// CHECK4-NEXT: [[TMP62:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK4-NEXT: [[TMP63:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP62]], i8* align 8 [[TMP63]], i32 40, i1 false) // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: -// CHECK4-NEXT: [[TMP62:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK4-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP62]]) +// CHECK4-NEXT: [[TMP64:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 +// CHECK4-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP64]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]], double* nonnull align 8 dereferenceable(8) [[ESCAPED:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 @@ -1266,6 +1310,7 @@ // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK4-NEXT: [[ESCAPED_ADDR:%.*]] = alloca double*, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1287,89 +1332,95 @@ // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 // CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK4-NEXT: store double* [[ESCAPED]], double** [[ESCAPED_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK4-NEXT: [[TMP5:%.*]] = load double*, double** [[ESCAPED_ADDR]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* -// CHECK4-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) -// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[B3]] to i8* +// CHECK4-NEXT: [[TMP13:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 40, i1 false) +// CHECK4-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP17]], [[TMP18]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 // CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] // CHECK4-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP20]] // CHECK4-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK4-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK4-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP21]] // CHECK4-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP22]] // CHECK4-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK4-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 +// CHECK4-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD16]] to double +// CHECK4-NEXT: [[TMP23:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK4-NEXT: [[ADD17:%.*]] = fadd double [[CONV]], [[TMP23]] +// CHECK4-NEXT: [[CONV18:%.*]] = fptosi double [[ADD17]] to i32 +// CHECK4-NEXT: store i32 [[CONV18]], i32* [[TMP1]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK4-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK4-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK4-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK4-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) +// CHECK4-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK4-NEXT: [[TMP31:%.*]] = bitcast [10 x i32]* [[C4]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 40, i1 false) // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] @@ -1437,7 +1488,7 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 // CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 +// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x i8*], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 @@ -1449,9 +1500,11 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 40, i16 1) +// CHECK5-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 48, i16 1) // CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK5-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK5-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 1 +// CHECK5-NEXT: [[ESCAPED:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK5-NEXT: store double 0.000000e+00, double* [[ESCAPED]], align 8 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -1501,77 +1554,80 @@ // CHECK5: omp.inner.for.body: // CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP21]] to i8* // CHECK5-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK5-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP22]] to i8* // CHECK5-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK5-NEXT: [[TMP28:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* // CHECK5-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK5-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP2]] to i8* // CHECK5-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 // CHECK5-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[B4]] to i8* // CHECK5-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 -// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 // CHECK5-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[C1]] to i8* // CHECK5-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 -// CHECK5-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP35:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 // CHECK5-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* // CHECK5-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 4 -// CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK5-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK5-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 -// CHECK5-NEXT: [[TMP41:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP41]], i32 7) +// CHECK5-NEXT: [[TMP37:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP38:%.*]] = bitcast double* [[ESCAPED]] to i8* +// CHECK5-NEXT: store i8* [[TMP38]], i8** [[TMP37]], align 4 +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK5-NEXT: [[TMP40:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK5-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 +// CHECK5-NEXT: [[TMP43:%.*]] = bitcast [8 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP42]], i32 [[TMP40]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*, double*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP43]], i32 8) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] -// CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] +// CHECK5-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP48]], [[TMP49]] +// CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP50]], [[TMP51]] // CHECK5-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK5: cond.true12: -// CHECK5-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK5-NEXT: br label [[COND_END14:%.*]] // CHECK5: cond.false13: -// CHECK5-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END14]] // CHECK5: cond.end14: -// CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] +// CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP52]], [[COND_TRUE12]] ], [ [[TMP53]], [[COND_FALSE13]] ] // CHECK5-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP54]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP54]]) -// CHECK5-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK5-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]]) +// CHECK5-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0 +// CHECK5-NEXT: br i1 [[TMP58]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP57:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK5-NEXT: [[TMP58:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i32 40, i1 false) +// CHECK5-NEXT: [[TMP59:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK5-NEXT: [[TMP60:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP59]], i8* align 8 [[TMP60]], i32 40, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: br label [[OMP_PRECOND_END]] @@ -1581,7 +1637,7 @@ // // // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]], double* nonnull align 8 dereferenceable(8) [[ESCAPED:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 @@ -1592,6 +1648,7 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK5-NEXT: [[ESCAPED_ADDR:%.*]] = alloca double*, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1613,89 +1670,95 @@ // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 // CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK5-NEXT: store double* [[ESCAPED]], double** [[ESCAPED_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load double*, double** [[ESCAPED_ADDR]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK5-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK5-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* -// CHECK5-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[B3]] to i8* +// CHECK5-NEXT: [[TMP13:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 40, i1 false) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 // CHECK5-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] // CHECK5-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] // CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP20]] // CHECK5-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] // CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP21]] // CHECK5-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] // CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP22]] // CHECK5-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] // CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK5-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 +// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD16]] to double +// CHECK5-NEXT: [[TMP23:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK5-NEXT: [[ADD17:%.*]] = fadd double [[CONV]], [[TMP23]] +// CHECK5-NEXT: [[CONV18:%.*]] = fptosi double [[ADD17]] to i32 +// CHECK5-NEXT: store i32 [[CONV18]], i32* [[TMP1]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK5-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK5-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK5-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK5-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) +// CHECK5-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK5-NEXT: [[TMP31:%.*]] = bitcast [10 x i32]* [[C4]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 40, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: br label [[OMP_PRECOND_END]] @@ -1763,7 +1826,7 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 // CHECK6-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 +// CHECK6-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x i8*], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK6-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 @@ -1775,9 +1838,11 @@ // CHECK6-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK6-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 40, i16 1) +// CHECK6-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 48, i16 1) // CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK6-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK6-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 1 +// CHECK6-NEXT: [[ESCAPED:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK6-NEXT: store double 0.000000e+00, double* [[ESCAPED]], align 8 // CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -1827,77 +1892,80 @@ // CHECK6: omp.inner.for.body: // CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK6-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP21]] to i8* // CHECK6-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK6-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP25:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK6-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP22]] to i8* // CHECK6-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK6-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP27:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK6-NEXT: [[TMP28:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* // CHECK6-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK6-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP29:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK6-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP2]] to i8* // CHECK6-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK6-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK6-NEXT: [[TMP31:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 // CHECK6-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[B4]] to i8* // CHECK6-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 -// CHECK6-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 +// CHECK6-NEXT: [[TMP33:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 // CHECK6-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[C1]] to i8* // CHECK6-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 -// CHECK6-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 +// CHECK6-NEXT: [[TMP35:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 // CHECK6-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* // CHECK6-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 4 -// CHECK6-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK6-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK6-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 -// CHECK6-NEXT: [[TMP41:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK6-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP41]], i32 7) +// CHECK6-NEXT: [[TMP37:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 7 +// CHECK6-NEXT: [[TMP38:%.*]] = bitcast double* [[ESCAPED]] to i8* +// CHECK6-NEXT: store i8* [[TMP38]], i8** [[TMP37]], align 4 +// CHECK6-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK6-NEXT: [[TMP40:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK6-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 +// CHECK6-NEXT: [[TMP43:%.*]] = bitcast [8 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK6-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP42]], i32 [[TMP40]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*, double*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP43]], i32 8) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK6-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] -// CHECK6-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK6-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK6-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] +// CHECK6-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP48]], [[TMP49]] +// CHECK6-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK6-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP50]], [[TMP51]] // CHECK6-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK6: cond.true12: -// CHECK6-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK6-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK6-NEXT: br label [[COND_END14:%.*]] // CHECK6: cond.false13: -// CHECK6-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END14]] // CHECK6: cond.end14: -// CHECK6-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] +// CHECK6-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP52]], [[COND_TRUE12]] ], [ [[TMP53]], [[COND_FALSE13]] ] // CHECK6-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP54]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP54]]) -// CHECK6-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK6-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK6-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]]) +// CHECK6-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0 +// CHECK6-NEXT: br i1 [[TMP58]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: -// CHECK6-NEXT: [[TMP57:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK6-NEXT: [[TMP58:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i32 40, i1 false) +// CHECK6-NEXT: [[TMP59:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK6-NEXT: [[TMP60:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP59]], i8* align 8 [[TMP60]], i32 40, i1 false) // CHECK6-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK6: .omp.lastprivate.done: // CHECK6-NEXT: br label [[OMP_PRECOND_END]] @@ -1907,7 +1975,7 @@ // // // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]], double* nonnull align 8 dereferenceable(8) [[ESCAPED:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 @@ -1918,6 +1986,7 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK6-NEXT: [[ESCAPED_ADDR:%.*]] = alloca double*, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1939,89 +2008,95 @@ // CHECK6-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK6-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 // CHECK6-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK6-NEXT: store double* [[ESCAPED]], double** [[ESCAPED_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK6-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK6-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK6-NEXT: [[TMP5:%.*]] = load double*, double** [[ESCAPED_ADDR]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK6-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK6-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK6-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK6-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK6: omp.precond.then: // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* -// CHECK6-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) -// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[B3]] to i8* +// CHECK6-NEXT: [[TMP13:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 40, i1 false) +// CHECK6-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK6-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP17]], [[TMP18]] // CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 // CHECK6-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] // CHECK6-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] // CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP20]] // CHECK6-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] // CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK6-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK6-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP21]] // CHECK6-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] // CHECK6-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK6-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK6-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP22]] // CHECK6-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] // CHECK6-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK6-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 +// CHECK6-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD16]] to double +// CHECK6-NEXT: [[TMP23:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK6-NEXT: [[ADD17:%.*]] = fadd double [[CONV]], [[TMP23]] +// CHECK6-NEXT: [[CONV18:%.*]] = fptosi double [[ADD17]] to i32 +// CHECK6-NEXT: store i32 [[CONV18]], i32* [[TMP1]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK6-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK6-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK6-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK6-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK6-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK6-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: -// CHECK6-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK6-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* -// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) +// CHECK6-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK6-NEXT: [[TMP31:%.*]] = bitcast [10 x i32]* [[C4]] to i8* +// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 40, i1 false) // CHECK6-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK6: .omp.lastprivate.done: // CHECK6-NEXT: br label [[OMP_PRECOND_END]] @@ -2091,7 +2166,7 @@ // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 // CHECK7-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 8 +// CHECK7-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x i8*], align 8 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK7-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 @@ -2110,7 +2185,9 @@ // CHECK7-NEXT: [[TMP6:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 // CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0 // CHECK7-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct._globalized_locals_ty* -// CHECK7-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 0 +// CHECK7-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 1 +// CHECK7-NEXT: [[ESCAPED:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 0 +// CHECK7-NEXT: store double 0.000000e+00, double* [[ESCAPED]], align 8 // CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK7-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -2162,88 +2239,91 @@ // CHECK7-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 // CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 -// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK7-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP25]] to i8* // CHECK7-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 -// CHECK7-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK7-NEXT: [[TMP30:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK7-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP27]] to i8* // CHECK7-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 -// CHECK7-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK7-NEXT: [[TMP32:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK7-NEXT: [[TMP33:%.*]] = bitcast i32* [[CONV]] to i8* // CHECK7-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 8 -// CHECK7-NEXT: [[TMP34:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK7-NEXT: [[TMP34:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK7-NEXT: [[TMP35:%.*]] = bitcast i32* [[TMP2]] to i8* // CHECK7-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 8 -// CHECK7-NEXT: [[TMP36:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK7-NEXT: [[TMP36:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 // CHECK7-NEXT: [[TMP37:%.*]] = bitcast [10 x i32]* [[B4]] to i8* // CHECK7-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 8 -// CHECK7-NEXT: [[TMP38:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 +// CHECK7-NEXT: [[TMP38:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 // CHECK7-NEXT: [[TMP39:%.*]] = bitcast [10 x i32]* [[C1]] to i8* // CHECK7-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 8 -// CHECK7-NEXT: [[TMP40:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 +// CHECK7-NEXT: [[TMP40:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 // CHECK7-NEXT: [[TMP41:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* // CHECK7-NEXT: store i8* [[TMP41]], i8** [[TMP40]], align 8 -// CHECK7-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK7-NEXT: [[TMP43:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK7-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK7-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK7-NEXT: [[TMP46:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK7-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP45]], i32 [[TMP43]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP46]], i64 7) +// CHECK7-NEXT: [[TMP42:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 7 +// CHECK7-NEXT: [[TMP43:%.*]] = bitcast double* [[ESCAPED]] to i8* +// CHECK7-NEXT: store i8* [[TMP43]], i8** [[TMP42]], align 8 +// CHECK7-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK7-NEXT: [[TMP45:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK7-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK7-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK7-NEXT: [[TMP48:%.*]] = bitcast [8 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK7-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP47]], i32 [[TMP45]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*, double*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP48]], i64 8) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] -// CHECK7-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] -// CHECK7-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] +// CHECK7-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP51]], [[TMP52]] -// CHECK7-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP51]], [[TMP52]] +// CHECK7-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK7-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP53]], [[TMP54]] +// CHECK7-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP53]], [[TMP54]] +// CHECK7-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK7-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP55]], [[TMP56]] // CHECK7-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK7: cond.true12: -// CHECK7-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK7-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK7-NEXT: br label [[COND_END14:%.*]] // CHECK7: cond.false13: -// CHECK7-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END14]] // CHECK7: cond.end14: -// CHECK7-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP55]], [[COND_TRUE12]] ], [ [[TMP56]], [[COND_FALSE13]] ] +// CHECK7-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP57]], [[COND_TRUE12]] ], [ [[TMP58]], [[COND_FALSE13]] ] // CHECK7-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP57]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP59:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP59]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP58:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK7-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP58]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP59]]) -// CHECK7-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0 -// CHECK7-NEXT: br i1 [[TMP61]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP60:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK7-NEXT: [[TMP61:%.*]] = load i32, i32* [[TMP60]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP61]]) +// CHECK7-NEXT: [[TMP62:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0 +// CHECK7-NEXT: br i1 [[TMP63]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP62:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK7-NEXT: [[TMP63:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP62]], i8* align 4 [[TMP63]], i64 40, i1 false) +// CHECK7-NEXT: [[TMP64:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK7-NEXT: [[TMP65:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP64]], i8* align 8 [[TMP65]], i64 40, i1 false) // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: // CHECK7-NEXT: br label [[OMP_PRECOND_END]] // CHECK7: omp.precond.end: -// CHECK7-NEXT: [[TMP64:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK7-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP64]]) +// CHECK7-NEXT: [[TMP66:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 +// CHECK7-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP66]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]], double* nonnull align 8 dereferenceable(8) [[ESCAPED:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2254,6 +2334,7 @@ // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK7-NEXT: [[ESCAPED_ADDR:%.*]] = alloca double*, align 8 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2275,95 +2356,101 @@ // CHECK7-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK7-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8 // CHECK7-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8 +// CHECK7-NEXT: store double* [[ESCAPED]], double** [[ESCAPED_ADDR]], align 8 // CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 // CHECK7-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK7-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 // CHECK7-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8 // CHECK7-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK7-NEXT: [[TMP5:%.*]] = load double*, double** [[ESCAPED_ADDR]], align 8 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK7-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK7-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK7-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK7-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK7-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK7: omp.precond.then: // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK7-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK7-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK7-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK7-NEXT: [[CONV:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK7-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK7-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK7-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK7-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 40, i1 false) -// CHECK7-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK7-NEXT: [[TMP13:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 40, i1 false) +// CHECK7-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[CONV7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK7-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP17]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[CONV7:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK7-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK7-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP18]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 // CHECK7-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I6]]) #[[ATTR5:[0-9]+]] // CHECK7-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] // CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]] -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK7-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK7-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B4]], i64 0, i64 [[IDXPROM]] // CHECK7-NEXT: [[CALL11:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] // CHECK7-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]] -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK7-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK7-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP21]] to i64 // CHECK7-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C5]], i64 0, i64 [[IDXPROM13]] // CHECK7-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] // CHECK7-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]] -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK7-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK7-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP22]] to i64 // CHECK7-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM17]] // CHECK7-NEXT: [[CALL19:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX18]]) #[[ATTR5]] // CHECK7-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]] -// CHECK7-NEXT: store i32 [[ADD20]], i32* [[TMP1]], align 4 +// CHECK7-NEXT: [[CONV21:%.*]] = sitofp i32 [[ADD20]] to double +// CHECK7-NEXT: [[TMP23:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK7-NEXT: [[ADD22:%.*]] = fadd double [[CONV21]], [[TMP23]] +// CHECK7-NEXT: [[CONV23:%.*]] = fptosi double [[ADD22]] to i32 +// CHECK7-NEXT: store i32 [[CONV23]], i32* [[TMP1]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK7-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK7-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK7-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK7-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK7-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C5]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 40, i1 false) +// CHECK7-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK7-NEXT: [[TMP31:%.*]] = bitcast [10 x i32]* [[C5]] to i8* +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 40, i1 false) // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: // CHECK7-NEXT: br label [[OMP_PRECOND_END]] @@ -2433,7 +2520,7 @@ // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 // CHECK8-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 8 +// CHECK8-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x i8*], align 8 // CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK8-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 @@ -2446,9 +2533,11 @@ // CHECK8-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK8-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 40, i16 1) +// CHECK8-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 48, i16 1) // CHECK8-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK8-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK8-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 1 +// CHECK8-NEXT: [[ESCAPED:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK8-NEXT: store double 0.000000e+00, double* [[ESCAPED]], align 8 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK8-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -2500,77 +2589,80 @@ // CHECK8-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 // CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 -// CHECK8-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK8-NEXT: [[TMP25:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK8-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP22]] to i8* // CHECK8-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK8-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK8-NEXT: [[TMP27:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK8-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP24]] to i8* // CHECK8-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK8-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK8-NEXT: [[TMP29:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK8-NEXT: [[TMP30:%.*]] = bitcast i32* [[CONV]] to i8* // CHECK8-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK8-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK8-NEXT: [[TMP31:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK8-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP2]] to i8* // CHECK8-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 -// CHECK8-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK8-NEXT: [[TMP33:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 // CHECK8-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[B4]] to i8* // CHECK8-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 -// CHECK8-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 +// CHECK8-NEXT: [[TMP35:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 // CHECK8-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[C1]] to i8* // CHECK8-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 8 -// CHECK8-NEXT: [[TMP37:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 +// CHECK8-NEXT: [[TMP37:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 // CHECK8-NEXT: [[TMP38:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* // CHECK8-NEXT: store i8* [[TMP38]], i8** [[TMP37]], align 8 -// CHECK8-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK8-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP39]], 0 -// CHECK8-NEXT: [[TMP40:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK8-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 -// CHECK8-NEXT: [[TMP43:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK8-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP42]], i32 [[TMP40]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP43]], i64 7) +// CHECK8-NEXT: [[TMP39:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 7 +// CHECK8-NEXT: [[TMP40:%.*]] = bitcast double* [[ESCAPED]] to i8* +// CHECK8-NEXT: store i8* [[TMP40]], i8** [[TMP39]], align 8 +// CHECK8-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK8-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK8-NEXT: [[TMP42:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK8-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK8-NEXT: [[TMP45:%.*]] = bitcast [8 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK8-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP44]], i32 [[TMP42]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*, double*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP45]], i64 8) // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK8-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK8-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK8-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] -// CHECK8-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK8-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK8-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK8-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP48]], [[TMP49]] -// CHECK8-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP48]], [[TMP49]] +// CHECK8-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK8-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK8-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP50]], [[TMP51]] +// CHECK8-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP50]], [[TMP51]] +// CHECK8-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK8-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP52]], [[TMP53]] // CHECK8-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK8: cond.true12: -// CHECK8-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK8-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK8-NEXT: br label [[COND_END14:%.*]] // CHECK8: cond.false13: -// CHECK8-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: br label [[COND_END14]] // CHECK8: cond.end14: -// CHECK8-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP52]], [[COND_TRUE12]] ], [ [[TMP53]], [[COND_FALSE13]] ] +// CHECK8-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP54]], [[COND_TRUE12]] ], [ [[TMP55]], [[COND_FALSE13]] ] // CHECK8-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP54]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP56]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]]) -// CHECK8-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0 -// CHECK8-NEXT: br i1 [[TMP58]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK8-NEXT: [[TMP57:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP57]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP58]]) +// CHECK8-NEXT: [[TMP59:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK8-NEXT: [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0 +// CHECK8-NEXT: br i1 [[TMP60]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK8: .omp.lastprivate.then: -// CHECK8-NEXT: [[TMP59:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK8-NEXT: [[TMP60:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP59]], i8* align 4 [[TMP60]], i64 40, i1 false) +// CHECK8-NEXT: [[TMP61:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK8-NEXT: [[TMP62:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP61]], i8* align 8 [[TMP62]], i64 40, i1 false) // CHECK8-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK8: .omp.lastprivate.done: // CHECK8-NEXT: br label [[OMP_PRECOND_END]] @@ -2580,7 +2672,7 @@ // // // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]], double* nonnull align 8 dereferenceable(8) [[ESCAPED:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2591,6 +2683,7 @@ // CHECK8-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK8-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK8-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK8-NEXT: [[ESCAPED_ADDR:%.*]] = alloca double*, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2612,95 +2705,101 @@ // CHECK8-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK8-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8 // CHECK8-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8 +// CHECK8-NEXT: store double* [[ESCAPED]], double** [[ESCAPED_ADDR]], align 8 // CHECK8-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 // CHECK8-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK8-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 // CHECK8-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8 // CHECK8-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 -// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK8-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK8-NEXT: [[TMP5:%.*]] = load double*, double** [[ESCAPED_ADDR]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK8-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK8-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK8-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK8-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK8-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK8-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK8: omp.precond.then: // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK8-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK8-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK8-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK8-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK8-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK8-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 40, i1 false) -// CHECK8-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK8-NEXT: [[TMP13:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 40, i1 false) +// CHECK8-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[CONV7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK8-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP17]] +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[CONV7:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK8-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK8-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP18]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 // CHECK8-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I6]]) #[[ATTR5:[0-9]+]] // CHECK8-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] // CHECK8-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]] -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B4]], i64 0, i64 [[IDXPROM]] // CHECK8-NEXT: [[CALL11:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] // CHECK8-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]] -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK8-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK8-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP21]] to i64 // CHECK8-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C5]], i64 0, i64 [[IDXPROM13]] // CHECK8-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] // CHECK8-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]] -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK8-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK8-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP22]] to i64 // CHECK8-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM17]] // CHECK8-NEXT: [[CALL19:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX18]]) #[[ATTR5]] // CHECK8-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]] -// CHECK8-NEXT: store i32 [[ADD20]], i32* [[TMP1]], align 4 +// CHECK8-NEXT: [[CONV21:%.*]] = sitofp i32 [[ADD20]] to double +// CHECK8-NEXT: [[TMP23:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK8-NEXT: [[ADD22:%.*]] = fadd double [[CONV21]], [[TMP23]] +// CHECK8-NEXT: [[CONV23:%.*]] = fptosi double [[ADD22]] to i32 +// CHECK8-NEXT: store i32 [[CONV23]], i32* [[TMP1]], align 4 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK8-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK8-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK8-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK8-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK8-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK8-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK8-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK8: .omp.lastprivate.then: -// CHECK8-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK8-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C5]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 40, i1 false) +// CHECK8-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK8-NEXT: [[TMP31:%.*]] = bitcast [10 x i32]* [[C5]] to i8* +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i64 40, i1 false) // CHECK8-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK8: .omp.lastprivate.done: // CHECK8-NEXT: br label [[OMP_PRECOND_END]] @@ -2768,7 +2867,7 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 +// CHECK9-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x i8*], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK9-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 @@ -2786,7 +2885,9 @@ // CHECK9-NEXT: [[TMP6:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct._globalized_locals_ty* -// CHECK9-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 0 +// CHECK9-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 1 +// CHECK9-NEXT: [[ESCAPED:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 0 +// CHECK9-NEXT: store double 0.000000e+00, double* [[ESCAPED]], align 8 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -2836,88 +2937,91 @@ // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP24]] to i8* // CHECK9-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK9-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP25]] to i8* // CHECK9-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK9-NEXT: [[TMP31:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* // CHECK9-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK9-NEXT: [[TMP33:%.*]] = bitcast i32* [[TMP2]] to i8* // CHECK9-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 // CHECK9-NEXT: [[TMP35:%.*]] = bitcast [10 x i32]* [[B4]] to i8* // CHECK9-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 // CHECK9-NEXT: [[TMP37:%.*]] = bitcast [10 x i32]* [[C1]] to i8* // CHECK9-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 4 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 // CHECK9-NEXT: [[TMP39:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* // CHECK9-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 4 -// CHECK9-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP40]], 0 -// CHECK9-NEXT: [[TMP41:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK9-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK9-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK9-NEXT: [[TMP44:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK9-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP43]], i32 [[TMP41]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP44]], i32 7) +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 7 +// CHECK9-NEXT: [[TMP41:%.*]] = bitcast double* [[ESCAPED]] to i8* +// CHECK9-NEXT: store i8* [[TMP41]], i8** [[TMP40]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK9-NEXT: [[TMP43:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = bitcast [8 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK9-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP45]], i32 [[TMP43]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*, double*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP46]], i32 8) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] +// CHECK9-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP51]], [[TMP52]] +// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP53]], [[TMP54]] // CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK9: cond.true12: -// CHECK9-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END14:%.*]] // CHECK9: cond.false13: -// CHECK9-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END14]] // CHECK9: cond.end14: -// CHECK9-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP53]], [[COND_TRUE12]] ], [ [[TMP54]], [[COND_FALSE13]] ] +// CHECK9-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP55]], [[COND_TRUE12]] ], [ [[TMP56]], [[COND_FALSE13]] ] // CHECK9-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP55]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP57]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP56:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK9-NEXT: [[TMP57:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP57]]) -// CHECK9-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP59:%.*]] = icmp ne i32 [[TMP58]], 0 -// CHECK9-NEXT: br i1 [[TMP59]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP58:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK9-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP59]]) +// CHECK9-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0 +// CHECK9-NEXT: br i1 [[TMP61]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP60:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK9-NEXT: [[TMP61:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP60]], i8* align 4 [[TMP61]], i32 40, i1 false) +// CHECK9-NEXT: [[TMP62:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK9-NEXT: [[TMP63:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP62]], i8* align 8 [[TMP63]], i32 40, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: -// CHECK9-NEXT: [[TMP62:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK9-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP62]]) +// CHECK9-NEXT: [[TMP64:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 +// CHECK9-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP64]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]], double* nonnull align 8 dereferenceable(8) [[ESCAPED:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 @@ -2928,6 +3032,7 @@ // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK9-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK9-NEXT: [[ESCAPED_ADDR:%.*]] = alloca double*, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2949,89 +3054,95 @@ // CHECK9-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK9-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 // CHECK9-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK9-NEXT: store double* [[ESCAPED]], double** [[ESCAPED_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load double*, double** [[ESCAPED_ADDR]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* -// CHECK9-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[B3]] to i8* +// CHECK9-NEXT: [[TMP13:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 40, i1 false) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 // CHECK9-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] // CHECK9-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP20]] // CHECK9-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP21]] // CHECK9-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP22]] // CHECK9-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK9-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD16]] to double +// CHECK9-NEXT: [[TMP23:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK9-NEXT: [[ADD17:%.*]] = fadd double [[CONV]], [[TMP23]] +// CHECK9-NEXT: [[CONV18:%.*]] = fptosi double [[ADD17]] to i32 +// CHECK9-NEXT: store i32 [[CONV18]], i32* [[TMP1]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK9-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK9-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK9-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) +// CHECK9-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK9-NEXT: [[TMP31:%.*]] = bitcast [10 x i32]* [[C4]] to i8* +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 40, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -3099,7 +3210,7 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 // CHECK10-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 +// CHECK10-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x i8*], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK10-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 @@ -3117,7 +3228,9 @@ // CHECK10-NEXT: [[TMP6:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 // CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i32 0 // CHECK10-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct._globalized_locals_ty* -// CHECK10-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 0 +// CHECK10-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 1 +// CHECK10-NEXT: [[ESCAPED:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 0 +// CHECK10-NEXT: store double 0.000000e+00, double* [[ESCAPED]], align 8 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK10-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -3167,88 +3280,91 @@ // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK10-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP24]] to i8* // CHECK10-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 -// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK10-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP25]] to i8* // CHECK10-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK10-NEXT: [[TMP31:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* // CHECK10-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4 -// CHECK10-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP32:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK10-NEXT: [[TMP33:%.*]] = bitcast i32* [[TMP2]] to i8* // CHECK10-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 4 -// CHECK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 // CHECK10-NEXT: [[TMP35:%.*]] = bitcast [10 x i32]* [[B4]] to i8* // CHECK10-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 4 -// CHECK10-NEXT: [[TMP36:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP36:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 // CHECK10-NEXT: [[TMP37:%.*]] = bitcast [10 x i32]* [[C1]] to i8* // CHECK10-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 4 -// CHECK10-NEXT: [[TMP38:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 +// CHECK10-NEXT: [[TMP38:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 // CHECK10-NEXT: [[TMP39:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* // CHECK10-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 4 -// CHECK10-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP40]], 0 -// CHECK10-NEXT: [[TMP41:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK10-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK10-NEXT: [[TMP44:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK10-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP43]], i32 [[TMP41]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP44]], i32 7) +// CHECK10-NEXT: [[TMP40:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 7 +// CHECK10-NEXT: [[TMP41:%.*]] = bitcast double* [[ESCAPED]] to i8* +// CHECK10-NEXT: store i8* [[TMP41]], i8** [[TMP40]], align 4 +// CHECK10-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK10-NEXT: [[TMP43:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK10-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK10-NEXT: [[TMP46:%.*]] = bitcast [8 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK10-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP45]], i32 [[TMP43]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*, double*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP46]], i32 8) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] -// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK10-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] +// CHECK10-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP51]], [[TMP52]] +// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK10-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP53]], [[TMP54]] // CHECK10-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK10: cond.true12: -// CHECK10-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK10-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: br label [[COND_END14:%.*]] // CHECK10: cond.false13: -// CHECK10-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END14]] // CHECK10: cond.end14: -// CHECK10-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP53]], [[COND_TRUE12]] ], [ [[TMP54]], [[COND_FALSE13]] ] +// CHECK10-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP55]], [[COND_TRUE12]] ], [ [[TMP56]], [[COND_FALSE13]] ] // CHECK10-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP55]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP57]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP56:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP57:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP57]]) -// CHECK10-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP59:%.*]] = icmp ne i32 [[TMP58]], 0 -// CHECK10-NEXT: br i1 [[TMP59]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK10-NEXT: [[TMP58:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP59:%.*]] = load i32, i32* [[TMP58]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP59]]) +// CHECK10-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP61:%.*]] = icmp ne i32 [[TMP60]], 0 +// CHECK10-NEXT: br i1 [[TMP61]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK10: .omp.lastprivate.then: -// CHECK10-NEXT: [[TMP60:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK10-NEXT: [[TMP61:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP60]], i8* align 4 [[TMP61]], i32 40, i1 false) +// CHECK10-NEXT: [[TMP62:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK10-NEXT: [[TMP63:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP62]], i8* align 8 [[TMP63]], i32 40, i1 false) // CHECK10-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK10: .omp.lastprivate.done: // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: -// CHECK10-NEXT: [[TMP62:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK10-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP62]]) +// CHECK10-NEXT: [[TMP64:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 +// CHECK10-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP64]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]], double* nonnull align 8 dereferenceable(8) [[ESCAPED:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 @@ -3259,6 +3375,7 @@ // CHECK10-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK10-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK10-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK10-NEXT: [[ESCAPED_ADDR:%.*]] = alloca double*, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3280,89 +3397,95 @@ // CHECK10-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK10-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 // CHECK10-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK10-NEXT: store double* [[ESCAPED]], double** [[ESCAPED_ADDR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK10-NEXT: [[TMP5:%.*]] = load double*, double** [[ESCAPED_ADDR]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK10-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* -// CHECK10-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) -// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[B3]] to i8* +// CHECK10-NEXT: [[TMP13:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 40, i1 false) +// CHECK10-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK10-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 // CHECK10-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] // CHECK10-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP20]] // CHECK10-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] // CHECK10-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK10-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK10-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP21]] // CHECK10-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] // CHECK10-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP22]] // CHECK10-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] // CHECK10-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK10-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 +// CHECK10-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD16]] to double +// CHECK10-NEXT: [[TMP23:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK10-NEXT: [[ADD17:%.*]] = fadd double [[CONV]], [[TMP23]] +// CHECK10-NEXT: [[CONV18:%.*]] = fptosi double [[ADD17]] to i32 +// CHECK10-NEXT: store i32 [[CONV18]], i32* [[TMP1]], align 4 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK10-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK10-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK10-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK10: .omp.lastprivate.then: -// CHECK10-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK10-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) +// CHECK10-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK10-NEXT: [[TMP31:%.*]] = bitcast [10 x i32]* [[C4]] to i8* +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 40, i1 false) // CHECK10-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK10: .omp.lastprivate.done: // CHECK10-NEXT: br label [[OMP_PRECOND_END]] @@ -3430,7 +3553,7 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 // CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 +// CHECK11-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x i8*], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 @@ -3442,9 +3565,11 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 40, i16 1) +// CHECK11-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 48, i16 1) // CHECK11-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK11-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK11-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 1 +// CHECK11-NEXT: [[ESCAPED:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK11-NEXT: store double 0.000000e+00, double* [[ESCAPED]], align 8 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -3494,77 +3619,80 @@ // CHECK11: omp.inner.for.body: // CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP21]] to i8* // CHECK11-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK11-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP22]] to i8* // CHECK11-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK11-NEXT: [[TMP28:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* // CHECK11-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK11-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP2]] to i8* // CHECK11-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 // CHECK11-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[B4]] to i8* // CHECK11-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 // CHECK11-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[C1]] to i8* // CHECK11-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 // CHECK11-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* // CHECK11-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK11-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK11-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 -// CHECK11-NEXT: [[TMP41:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK11-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP41]], i32 7) +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 7 +// CHECK11-NEXT: [[TMP38:%.*]] = bitcast double* [[ESCAPED]] to i8* +// CHECK11-NEXT: store i8* [[TMP38]], i8** [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: [[TMP40:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = bitcast [8 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK11-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP42]], i32 [[TMP40]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*, double*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP43]], i32 8) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] -// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] +// CHECK11-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP48]], [[TMP49]] +// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP50]], [[TMP51]] // CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK11: cond.true12: -// CHECK11-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END14:%.*]] // CHECK11: cond.false13: -// CHECK11-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END14]] // CHECK11: cond.end14: -// CHECK11-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] +// CHECK11-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP52]], [[COND_TRUE12]] ], [ [[TMP53]], [[COND_FALSE13]] ] // CHECK11-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP54]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP54]]) -// CHECK11-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK11-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]]) +// CHECK11-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0 +// CHECK11-NEXT: br i1 [[TMP58]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP57:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: [[TMP58:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i32 40, i1 false) +// CHECK11-NEXT: [[TMP59:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK11-NEXT: [[TMP60:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP59]], i8* align 8 [[TMP60]], i32 40, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -3574,7 +3702,7 @@ // // // CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]], double* nonnull align 8 dereferenceable(8) [[ESCAPED:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 @@ -3585,6 +3713,7 @@ // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK11-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[ESCAPED_ADDR:%.*]] = alloca double*, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3606,89 +3735,95 @@ // CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK11-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 // CHECK11-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK11-NEXT: store double* [[ESCAPED]], double** [[ESCAPED_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: [[TMP5:%.*]] = load double*, double** [[ESCAPED_ADDR]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* -// CHECK11-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[B3]] to i8* +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 40, i1 false) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 // CHECK11-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] // CHECK11-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP20]] // CHECK11-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP21]] // CHECK11-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP22]] // CHECK11-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] // CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK11-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD16]] to double +// CHECK11-NEXT: [[TMP23:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK11-NEXT: [[ADD17:%.*]] = fadd double [[CONV]], [[TMP23]] +// CHECK11-NEXT: [[CONV18:%.*]] = fptosi double [[ADD17]] to i32 +// CHECK11-NEXT: store i32 [[CONV18]], i32* [[TMP1]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK11-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) +// CHECK11-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK11-NEXT: [[TMP31:%.*]] = bitcast [10 x i32]* [[C4]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 40, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -3756,7 +3891,7 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 // CHECK12-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 +// CHECK12-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [8 x i8*], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK12-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 @@ -3768,9 +3903,11 @@ // CHECK12-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK12-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 40, i16 1) +// CHECK12-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 48, i16 1) // CHECK12-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK12-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK12-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 1 +// CHECK12-NEXT: [[ESCAPED:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK12-NEXT: store double 0.000000e+00, double* [[ESCAPED]], align 8 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -3820,77 +3957,80 @@ // CHECK12: omp.inner.for.body: // CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP23:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK12-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP21]] to i8* // CHECK12-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK12-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP25:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 // CHECK12-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP22]] to i8* // CHECK12-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP27:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK12-NEXT: [[TMP28:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* // CHECK12-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK12-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP29:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK12-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP2]] to i8* // CHECK12-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK12-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP31:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 // CHECK12-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[B4]] to i8* // CHECK12-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 -// CHECK12-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 +// CHECK12-NEXT: [[TMP33:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 // CHECK12-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[C1]] to i8* // CHECK12-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 -// CHECK12-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 +// CHECK12-NEXT: [[TMP35:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 // CHECK12-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* // CHECK12-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 4 -// CHECK12-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK12-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK12-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK12-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 -// CHECK12-NEXT: [[TMP41:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK12-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP41]], i32 7) +// CHECK12-NEXT: [[TMP37:%.*]] = getelementptr inbounds [8 x i8*], [8 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 7 +// CHECK12-NEXT: [[TMP38:%.*]] = bitcast double* [[ESCAPED]] to i8* +// CHECK12-NEXT: store i8* [[TMP38]], i8** [[TMP37]], align 4 +// CHECK12-NEXT: [[TMP39:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK12-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK12-NEXT: [[TMP40:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK12-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP42:%.*]] = load i32, i32* [[TMP41]], align 4 +// CHECK12-NEXT: [[TMP43:%.*]] = bitcast [8 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK12-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP42]], i32 [[TMP40]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*, double*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP43]], i32 8) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK12-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] -// CHECK12-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 // CHECK12-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] +// CHECK12-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP48]], [[TMP49]] +// CHECK12-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP50]], [[TMP51]] // CHECK12-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK12: cond.true12: -// CHECK12-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK12-NEXT: br label [[COND_END14:%.*]] // CHECK12: cond.false13: -// CHECK12-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: br label [[COND_END14]] // CHECK12: cond.end14: -// CHECK12-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] +// CHECK12-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP52]], [[COND_TRUE12]] ], [ [[TMP53]], [[COND_FALSE13]] ] // CHECK12-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP54]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP54]]) -// CHECK12-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK12-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK12-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]]) +// CHECK12-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0 +// CHECK12-NEXT: br i1 [[TMP58]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK12: .omp.lastprivate.then: -// CHECK12-NEXT: [[TMP57:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK12-NEXT: [[TMP58:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i32 40, i1 false) +// CHECK12-NEXT: [[TMP59:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK12-NEXT: [[TMP60:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP59]], i8* align 8 [[TMP60]], i32 40, i1 false) // CHECK12-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK12: .omp.lastprivate.done: // CHECK12-NEXT: br label [[OMP_PRECOND_END]] @@ -3900,7 +4040,7 @@ // // // CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]], double* nonnull align 8 dereferenceable(8) [[ESCAPED:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 @@ -3911,6 +4051,7 @@ // CHECK12-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK12-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK12-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK12-NEXT: [[ESCAPED_ADDR:%.*]] = alloca double*, align 4 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3932,89 +4073,95 @@ // CHECK12-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK12-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 // CHECK12-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK12-NEXT: store double* [[ESCAPED]], double** [[ESCAPED_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK12-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK12-NEXT: [[TMP5:%.*]] = load double*, double** [[ESCAPED_ADDR]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK12: omp.precond.then: // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* -// CHECK12-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) -// CHECK12-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[B3]] to i8* +// CHECK12-NEXT: [[TMP13:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 40, i1 false) +// CHECK12-NEXT: [[TMP14:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 // CHECK12-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] // CHECK12-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP20]] // CHECK12-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] // CHECK12-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK12-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK12-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP21]] // CHECK12-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] // CHECK12-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK12-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK12-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP22]] // CHECK12-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] // CHECK12-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK12-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 +// CHECK12-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD16]] to double +// CHECK12-NEXT: [[TMP23:%.*]] = load double, double* [[TMP5]], align 8 +// CHECK12-NEXT: [[ADD17:%.*]] = fadd double [[CONV]], [[TMP23]] +// CHECK12-NEXT: [[CONV18:%.*]] = fptosi double [[ADD17]] to i32 +// CHECK12-NEXT: store i32 [[CONV18]], i32* [[TMP1]], align 4 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK12-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK12-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK12-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK12: .omp.lastprivate.then: -// CHECK12-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK12-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* -// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) +// CHECK12-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK12-NEXT: [[TMP31:%.*]] = bitcast [10 x i32]* [[C4]] to i8* +// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP30]], i8* align 4 [[TMP31]], i32 40, i1 false) // CHECK12-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK12: .omp.lastprivate.done: // CHECK12-NEXT: br label [[OMP_PRECOND_END]]