diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -2374,6 +2374,10 @@ Dir->setNextLowerBound(Exprs.NLB); Dir->setNextUpperBound(Exprs.NUB); Dir->setNumIterations(Exprs.NumIterations); + Dir->setPrevLowerBoundVariable(Exprs.PrevLB); + Dir->setPrevUpperBoundVariable(Exprs.PrevUB); + Dir->setDistInc(Exprs.DistInc); + Dir->setPrevEnsureUpperBound(Exprs.PrevEUB); Dir->setCounters(Exprs.Counters); Dir->setPrivateCounters(Exprs.PrivateCounters); Dir->setInits(Exprs.Inits); @@ -2383,6 +2387,15 @@ Dir->setDependentInits(Exprs.DependentInits); Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); + Dir->setCombinedLowerBoundVariable(Exprs.DistCombinedFields.LB); + Dir->setCombinedUpperBoundVariable(Exprs.DistCombinedFields.UB); + Dir->setCombinedEnsureUpperBound(Exprs.DistCombinedFields.EUB); + Dir->setCombinedInit(Exprs.DistCombinedFields.Init); + Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); + Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); + Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); return Dir; } @@ -2418,6 +2431,10 @@ Dir->setNextLowerBound(Exprs.NLB); Dir->setNextUpperBound(Exprs.NUB); Dir->setNumIterations(Exprs.NumIterations); + Dir->setPrevLowerBoundVariable(Exprs.PrevLB); + Dir->setPrevUpperBoundVariable(Exprs.PrevUB); + Dir->setDistInc(Exprs.DistInc); + Dir->setPrevEnsureUpperBound(Exprs.PrevEUB); Dir->setCounters(Exprs.Counters); Dir->setPrivateCounters(Exprs.PrivateCounters); Dir->setInits(Exprs.Inits); @@ -2427,6 +2444,15 @@ Dir->setDependentInits(Exprs.DependentInits); Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); + Dir->setCombinedLowerBoundVariable(Exprs.DistCombinedFields.LB); + Dir->setCombinedUpperBoundVariable(Exprs.DistCombinedFields.UB); + Dir->setCombinedEnsureUpperBound(Exprs.DistCombinedFields.EUB); + Dir->setCombinedInit(Exprs.DistCombinedFields.Init); + Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); + Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); + Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); return Dir; } diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -603,7 +603,9 @@ DKind == OMPD_teams_distribute_parallel_for_simd || DKind == OMPD_teams_distribute_parallel_for || DKind == OMPD_target_teams_distribute_parallel_for || - DKind == OMPD_target_teams_distribute_parallel_for_simd; + DKind == OMPD_target_teams_distribute_parallel_for_simd || + DKind == OMPD_parallel_loop || DKind == OMPD_teams_loop || + DKind == OMPD_target_parallel_loop || DKind == OMPD_target_teams_loop; } bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) { @@ -632,7 +634,8 @@ DKind == OMPD_parallel_master_taskloop_simd || DKind == OMPD_parallel_masked_taskloop || DKind == OMPD_parallel_masked_taskloop_simd || - DKind == OMPD_parallel_loop || DKind == OMPD_target_parallel_loop; + DKind == OMPD_parallel_loop || DKind == OMPD_target_parallel_loop || + DKind == OMPD_teams_loop; } bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) { @@ -729,7 +732,8 @@ Kind == OMPD_teams_distribute_parallel_for_simd || Kind == OMPD_teams_distribute_parallel_for || Kind == OMPD_target_teams_distribute_parallel_for || - Kind == OMPD_target_teams_distribute_parallel_for_simd; + Kind == OMPD_target_teams_distribute_parallel_for_simd || + Kind == OMPD_teams_loop || Kind == OMPD_target_teams_loop; } bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { @@ -766,7 +770,6 @@ case OMPD_target_teams: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: - case OMPD_target_teams_loop: CaptureRegions.push_back(OMPD_task); CaptureRegions.push_back(OMPD_target); CaptureRegions.push_back(OMPD_teams); @@ -781,6 +784,7 @@ CaptureRegions.push_back(OMPD_task); CaptureRegions.push_back(OMPD_target); break; + case OMPD_teams_loop: case OMPD_teams_distribute_parallel_for: case OMPD_teams_distribute_parallel_for_simd: CaptureRegions.push_back(OMPD_teams); @@ -815,6 +819,7 @@ CaptureRegions.push_back(OMPD_parallel); CaptureRegions.push_back(OMPD_taskloop); break; + case OMPD_target_teams_loop: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: CaptureRegions.push_back(OMPD_task); @@ -822,9 +827,6 @@ CaptureRegions.push_back(OMPD_teams); CaptureRegions.push_back(OMPD_parallel); break; - case OMPD_teams_loop: - CaptureRegions.push_back(OMPD_teams); - break; case OMPD_nothing: CaptureRegions.push_back(OMPD_nothing); break; diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2812,7 +2812,7 @@ const StaticRTInput &Values) { OpenMPSchedType ScheduleNum = getRuntimeSchedule( ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); - assert(isOpenMPWorksharingDirective(DKind) && + assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) && "Expected loop-based or sections-based directive."); llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, isOpenMPLoopDirective(DKind) @@ -6206,6 +6206,7 @@ DefaultVal = -1; return nullptr; } + case OMPD_target_teams_loop: case OMPD_target_teams: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: @@ -6225,12 +6226,14 @@ case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: + case OMPD_target_parallel_loop: case OMPD_target_simd: DefaultVal = 1; return nullptr; case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_loop: case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: @@ -6447,6 +6450,8 @@ return ThreadLimit; } return nullptr; + case OMPD_target_teams_loop: + case OMPD_target_parallel_loop: case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: @@ -6649,6 +6654,8 @@ getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal)) return NumThreads; return Bld.getInt32(0); + case OMPD_target_teams_loop: + case OMPD_target_parallel_loop: case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: @@ -9072,7 +9079,8 @@ OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: - if (isOpenMPDistributeDirective(DKind)) + // For now, just treat 'target teams loop' as if it's distributed. + if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop) return NestedDir; if (DKind == OMPD_teams) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( @@ -9556,7 +9564,8 @@ OpenMPDirectiveKind Kind = D.getDirectiveKind(); const OMPExecutableDirective *TD = &D; // Get nested teams distribute kind directive, if any. - if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) + if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) && + Kind != OMPD_target_teams_loop) TD = getNestedDistributeDirective(CGM.getContext(), D); if (!TD) return llvm::ConstantInt::get(CGF.Int64Ty, 0); @@ -9945,6 +9954,14 @@ CGM, ParentName, cast(E)); break; + case OMPD_target_teams_loop: + CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( + CGM, ParentName, cast(E)); + break; + case OMPD_target_parallel_loop: + CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( + CGM, ParentName, cast(E)); + break; case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -653,6 +653,8 @@ case OMPD_target: case OMPD_target_teams: return hasNestedSPMDDirective(Ctx, D); + case OMPD_target_teams_loop: + case OMPD_target_parallel_loop: case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -416,16 +416,19 @@ EmitOMPGenericLoopDirective(cast(*S)); break; case Stmt::OMPTeamsGenericLoopDirectiveClass: - llvm_unreachable("teams loop directive not supported yet."); + EmitOMPTeamsGenericLoopDirective(cast(*S)); break; case Stmt::OMPTargetTeamsGenericLoopDirectiveClass: - llvm_unreachable("target teams loop directive not supported yet."); + EmitOMPTargetTeamsGenericLoopDirective( + cast(*S)); break; case Stmt::OMPParallelGenericLoopDirectiveClass: - llvm_unreachable("parallel loop directive not supported yet."); + EmitOMPParallelGenericLoopDirective( + cast(*S)); break; case Stmt::OMPTargetParallelGenericLoopDirectiveClass: - llvm_unreachable("target parallel loop directive not supported yet."); + EmitOMPTargetParallelGenericLoopDirective( + cast(*S)); break; case Stmt::OMPParallelMaskedDirectiveClass: EmitOMPParallelMaskedDirective(cast(*S)); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -7852,6 +7852,148 @@ CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen); } +void CodeGenFunction::EmitOMPParallelGenericLoopDirective( + const OMPLoopDirective &S) { + // Emit combined directive as if its consituent constructs are 'parallel' + // and 'for'. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + emitOMPCopyinClause(CGF, S); + (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); + }; + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); +} + +void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( + const OMPTeamsGenericLoopDirective &S) { + // To be consistent with current behavior of 'target teams loop', emit + // 'teams loop' as if its constituent constructs are 'distribute, + // 'parallel, and 'for'. + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +static void +emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF, + const OMPTargetTeamsGenericLoopDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + // Emit 'teams loop' as if its constituent constructs are 'distribute, + // 'parallel, and 'for'. + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective( + CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, + CodeGenTeams); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +/// Emit combined directive 'target teams loop' as if its constituent +/// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'. +void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective( + const OMPTargetTeamsGenericLoopDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsGenericLoopRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsGenericLoopDirective &S) { + // Emit SPMD target parallel loop region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsGenericLoopRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && + "Target device function emission failed for 'target teams loop'."); +} + +static void emitTargetParallelGenericLoopRegion( + CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + // Emit as 'parallel for'. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPCancelStackRAII CancelRegion( + CGF, OMPD_target_parallel_loop, /*hasCancel=*/false); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelGenericLoopDirective &S) { + // Emit target parallel loop region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelGenericLoopRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +/// Emit combined directive 'target parallel loop' as if its constituent +/// constructs are 'target', 'parallel', and 'for'. +void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective( + const OMPTargetParallelGenericLoopDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelGenericLoopRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + void CodeGenFunction::EmitSimpleOMPExecutableDirective( const OMPExecutableDirective &D) { if (const auto *SD = dyn_cast(&D)) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3580,6 +3580,12 @@ void EmitOMPTargetTeamsDistributeSimdDirective( const OMPTargetTeamsDistributeSimdDirective &S); void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S); + void EmitOMPParallelGenericLoopDirective(const OMPLoopDirective &S); + void EmitOMPTargetParallelGenericLoopDirective( + const OMPTargetParallelGenericLoopDirective &S); + void EmitOMPTargetTeamsGenericLoopDirective( + const OMPTargetTeamsGenericLoopDirective &S); + void EmitOMPTeamsGenericLoopDirective(const OMPTeamsGenericLoopDirective &S); void EmitOMPInteropDirective(const OMPInteropDirective &S); void EmitOMPParallelMaskedDirective(const OMPParallelMaskedDirective &S); @@ -3620,6 +3626,16 @@ CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S); + /// Emit device code for the target teams loop directive. + static void EmitOMPTargetTeamsGenericLoopDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsGenericLoopDirective &S); + + /// Emit device code for the target parallel loop directive. + static void EmitOMPTargetParallelGenericLoopDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelGenericLoopDirective &S); + static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction( CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4199,7 +4199,6 @@ case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: - case OMPD_target_teams_loop: case OMPD_target_parallel_loop: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: { @@ -4448,6 +4447,7 @@ Params); break; } + case OMPD_target_teams_loop: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: { QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1).withConst(); @@ -4506,22 +4506,7 @@ break; } - case OMPD_teams_loop: { - QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1).withConst(); - QualType KmpInt32PtrTy = - Context.getPointerType(KmpInt32Ty).withConst().withRestrict(); - - Sema::CapturedParamNameType ParamsTeams[] = { - std::make_pair(".global_tid.", KmpInt32PtrTy), - std::make_pair(".bound_tid.", KmpInt32PtrTy), - std::make_pair(StringRef(), QualType()) // __context with shared vars - }; - // Start a captured region for 'teams'. - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - ParamsTeams, /*OpenMPCaptureLevel=*/0); - break; - } - + case OMPD_teams_loop: case OMPD_teams_distribute_parallel_for: case OMPD_teams_distribute_parallel_for_simd: { QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1).withConst(); @@ -15381,6 +15366,7 @@ break; } [[fallthrough]]; + case OMPD_target_teams_loop: case OMPD_target_teams_distribute_parallel_for: // If this clause applies to the nested 'parallel' region, capture within // the 'teams' region, otherwise do not capture. @@ -15473,7 +15459,6 @@ case OMPD_target: case OMPD_target_teams: case OMPD_target_teams_distribute: - case OMPD_target_teams_loop: case OMPD_distribute_parallel_for: case OMPD_task: case OMPD_taskloop: diff --git a/clang/test/OpenMP/generic_loop_codegen.cpp b/clang/test/OpenMP/generic_loop_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/generic_loop_codegen.cpp @@ -0,0 +1,117 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +void foo(int t) { + + int i, j, z; + #pragma omp loop collapse(2) reduction(+:z) lastprivate(j) bind(thread) + for (int i = 0; i +tx ftemplate(int n) { + tx a[N]; + short aa[N]; + tx b[10]; + tx c[M][M]; + tx f = n; + tx l; + int k; + tx *v; + +#pragma omp target teams loop map(tofrom: aa) num_teams(M) thread_limit(64) + for(int i = 0; i < n; i++) { + aa[i] += 1; + } + +#pragma omp target teams loop map(tofrom:a, aa, b) if(target: n>40) + for(int i = 0; i < 10; i++) { + b[i] += 1; + } + +#pragma omp target teams loop collapse(2) firstprivate(f) private(k) + for(int i = 0; i < M; i++) { + for(int j = 0; j < M; j++) { + k = M; + c[i][j] = i + j * f + k; + } + } + +#pragma omp target teams loop collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < n; j++) { + c[i][j] = i + j; + } + } + +#pragma omp target teams loop map(a, v[:N]) + for(int i = 0; i < n; i++) + a[i] = v[i]; + return a[0]; +} + +int bar(int n){ + int a = 0; + + a += ftemplate(n); + + return a; +} + +#endif +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28 +// CHECK1-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK1: user_code.entry: +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: ret void +// CHECK1: worker.exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr +// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK1: cond.true10: +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END12:%.*]] +// CHECK1: cond.false11: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END12]] +// CHECK1: cond.end12: +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP41]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 +// CHECK1-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 +// CHECK1-NEXT: store i16 [[CONV9]], ptr [[ARRAYIDX]], align 2 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33 +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK1: user_code.entry: +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: ret void +// CHECK1: worker.exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK1: cond.true5: +// CHECK1-NEXT: br label [[COND_END7:%.*]] +// CHECK1: cond.false6: +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END7]] +// CHECK1: cond.end7: +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR4]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK1: user_code.entry: +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: ret void +// CHECK1: worker.exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[F_CASTED]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr +// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK1: cond.true6: +// CHECK1-NEXT: br label [[COND_END8:%.*]] +// CHECK1: cond.false7: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END8]] +// CHECK1: cond.end8: +// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK1-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL5]] +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK1-NEXT: store i32 10, ptr [[K]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], [[MUL8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP14]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46 +// CHECK1-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR4]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK1: user_code.entry: +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: ret void +// CHECK1: worker.exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: land.lhs.true: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK1-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK1-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP17]] to ptr +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP32]], [[TMP33]] +// CHECK1-NEXT: store i64 [[ADD15]], ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] +// CHECK1: cond.true18: +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: br label [[COND_END20:%.*]] +// CHECK1: cond.false19: +// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: br label [[COND_END20]] +// CHECK1: cond.end20: +// CHECK1-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP38]], [[COND_TRUE18]] ], [ [[TMP39]], [[COND_FALSE19]] ] +// CHECK1-NEXT: store i64 [[COND21]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP40]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP42]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: land.lhs.true: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp ule i64 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 +// CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] +// CHECK1-NEXT: [[CONV15:%.*]] = sext i32 [[MUL14]] to i64 +// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i64 [[TMP15]], [[CONV15]] +// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK1-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK1-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 +// CHECK1-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] +// CHECK1-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 +// CHECK1-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP18]], [[CONV22]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK1-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 +// CHECK1-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 +// CHECK1-NEXT: [[MUL28:%.*]] = mul nsw i64 [[DIV23]], [[CONV27]] +// CHECK1-NEXT: [[SUB29:%.*]] = sub nsw i64 [[TMP17]], [[MUL28]] +// CHECK1-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 +// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] +// CHECK1-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 +// CHECK1-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK1-NEXT: [[ADD33:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK1-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM34]] +// CHECK1-NEXT: store i32 [[ADD33]], ptr [[ARRAYIDX35]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53 +// CHECK1-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR4]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK1: user_code.entry: +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: ret void +// CHECK1: worker.exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to ptr +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to ptr +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to ptr +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK1: cond.true10: +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END12:%.*]] +// CHECK1: cond.false11: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END12]] +// CHECK1: cond.end12: +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ] +// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP43]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28 +// CHECK2-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK2: user_code.entry: +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: ret void +// CHECK2: worker.exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr +// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr +// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr +// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK2: cond.true10: +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END12:%.*]] +// CHECK2: cond.false11: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END12]] +// CHECK2: cond.end12: +// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP41]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 +// CHECK2-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 +// CHECK2-NEXT: store i16 [[CONV9]], ptr [[ARRAYIDX]], align 2 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33 +// CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK2: user_code.entry: +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: ret void +// CHECK2: worker.exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr +// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr +// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK2: cond.true5: +// CHECK2-NEXT: br label [[COND_END7:%.*]] +// CHECK2: cond.false6: +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END7]] +// CHECK2: cond.end7: +// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK2-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 +// CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR4]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK2: user_code.entry: +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: ret void +// CHECK2: worker.exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[F_CASTED]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr +// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr +// CHECK2-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr +// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK2: cond.true6: +// CHECK2-NEXT: br label [[COND_END8:%.*]] +// CHECK2: cond.false7: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END8]] +// CHECK2: cond.end8: +// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK2-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL5]] +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK2-NEXT: store i32 10, ptr [[K]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK2-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], [[MUL8]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP14]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] +// CHECK2-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46 +// CHECK2-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR4]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK2: user_code.entry: +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: ret void +// CHECK2: worker.exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]] +// CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1 +// CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: land.lhs.true: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr +// CHECK2-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr +// CHECK2-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP22]] to ptr +// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP29]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: br i1 [[CMP15]], label [[COND_TRUE16:%.*]], label [[COND_FALSE17:%.*]] +// CHECK2: cond.true16: +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: br label [[COND_END18:%.*]] +// CHECK2: cond.false17: +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END18]] +// CHECK2: cond.end18: +// CHECK2-NEXT: [[COND19:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE16]] ], [ [[TMP41]], [[COND_FALSE17]] ] +// CHECK2-NEXT: store i32 [[COND19]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP44]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]] +// CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1 +// CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: land.lhs.true: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV8]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV11:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CMP12:%.*]] = icmp ule i64 [[CONV11]], [[TMP14]] +// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK2-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[TMP15]], [[MUL15]] +// CHECK2-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL17]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I9]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK2-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1 +// CHECK2-NEXT: [[MUL20:%.*]] = mul nsw i32 1, [[DIV19]] +// CHECK2-NEXT: [[DIV21:%.*]] = sdiv i32 [[TMP18]], [[MUL20]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK2-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK2-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK2-NEXT: [[MUL25:%.*]] = mul nsw i32 [[DIV21]], [[MUL24]] +// CHECK2-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP17]], [[MUL25]] +// CHECK2-NEXT: [[MUL27:%.*]] = mul nsw i32 [[SUB26]], 1 +// CHECK2-NEXT: [[ADD28:%.*]] = add nsw i32 0, [[MUL27]] +// CHECK2-NEXT: store i32 [[ADD28]], ptr [[J10]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK2-NEXT: [[IDXPROM30:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM30]] +// CHECK2-NEXT: store i32 [[ADD29]], ptr [[ARRAYIDX31]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: store i32 [[ADD32]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53 +// CHECK2-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR4]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK2: user_code.entry: +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: ret void +// CHECK2: worker.exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to ptr +// CHECK2-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to ptr +// CHECK2-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to ptr +// CHECK2-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK2-NEXT: store ptr [[TMP20]], ptr [[TMP28]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK2: cond.true10: +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END12:%.*]] +// CHECK2: cond.false11: +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END12]] +// CHECK2: cond.end12: +// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ] +// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP43]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM7]] +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX8]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28 +// CHECK3-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK3: user_code.entry: +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: ret void +// CHECK3: worker.exit: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr +// CHECK3-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr +// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr +// CHECK3-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK3: cond.true10: +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: br label [[COND_END12:%.*]] +// CHECK3: cond.false11: +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END12]] +// CHECK3: cond.end12: +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ] +// CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP39]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 +// CHECK3-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX]], align 2 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33 +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK3: user_code.entry: +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: ret void +// CHECK3: worker.exit: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK3: cond.true5: +// CHECK3-NEXT: br label [[COND_END7:%.*]] +// CHECK3: cond.false6: +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END7]] +// CHECK3: cond.end7: +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR4]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK3: user_code.entry: +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: ret void +// CHECK3: worker.exit: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to ptr +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to ptr +// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP10]] to ptr +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP25]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[J]], align 4 +// CHECK3-NEXT: store i32 10, ptr [[K]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX9]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46 +// CHECK3-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR4]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK3: user_code.entry: +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: ret void +// CHECK3: worker.exit: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK3-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: land.lhs.true: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK3-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK3-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] +// CHECK3-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP18]] to ptr +// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP20]] to ptr +// CHECK3-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP22]] to ptr +// CHECK3-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP32]], [[TMP33]] +// CHECK3-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] +// CHECK3-NEXT: store i64 [[ADD15]], ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP38]], [[TMP39]] +// CHECK3-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] +// CHECK3: cond.true18: +// CHECK3-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: br label [[COND_END20:%.*]] +// CHECK3: cond.false19: +// CHECK3-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: br label [[COND_END20]] +// CHECK3: cond.end20: +// CHECK3-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP40]], [[COND_TRUE18]] ], [ [[TMP41]], [[COND_FALSE19]] ] +// CHECK3-NEXT: store i64 [[COND21]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP42]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP44]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK3-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: land.lhs.true: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[CONV9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CONV10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK3-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CONV13:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK3-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP13]], [[CONV13]] +// CHECK3-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK3-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK3-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK3-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP15]], [[CONV18]] +// CHECK3-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK3-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK3-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK3-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK3-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK3-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK3-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP18]], [[CONV25]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK3-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK3-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK3-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK3-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK3-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP17]], [[MUL31]] +// CHECK3-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK3-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK3-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK3-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK3-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK3-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP24]] +// CHECK3-NEXT: store i32 [[ADD36]], ptr [[ARRAYIDX37]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53 +// CHECK3-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR4]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) +// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK3: user_code.entry: +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK3-NEXT: ret void +// CHECK3: worker.exit: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[V_ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP14]] to ptr +// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP15]] to ptr +// CHECK3-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP17]] to ptr +// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK3: cond.true10: +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: br label [[COND_END12:%.*]] +// CHECK3: cond.false11: +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END12]] +// CHECK3: cond.end12: +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP41]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX5]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void +// diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp @@ -0,0 +1,525 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2 + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +int a; + +int foo(int *a); + +int main(int argc, char **argv) { +#pragma omp target teams loop map(tofrom:a) if(target:argc) + for (int i= 0; i < argc; ++i) + a = foo(&i) + foo(&a) + foo(&argc); + return 0; +} + + +#endif +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24 +// CHECK1-SAME: (i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK1: user_code.entry: +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK1-NEXT: ret void +// CHECK1: worker.exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARGC_CASTED]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr +// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK1: cond.true10: +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END12:%.*]] +// CHECK1: cond.false11: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END12]] +// CHECK1: cond.end12: +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP41]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP16]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24 +// CHECK2-SAME: (i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) +// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK2: user_code.entry: +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) +// CHECK2-NEXT: ret void +// CHECK2: worker.exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], ptr [[ARGC_CASTED]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr +// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr +// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr +// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK2: cond.true10: +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END12:%.*]] +// CHECK2: cond.false11: +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END12]] +// CHECK2: cond.end12: +// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ] +// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP39]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I3]]) #[[ATTR5:[0-9]+]] +// CHECK2-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]] +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CALL]], [[CALL5]] +// CHECK2-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]] +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD6]], [[CALL7]] +// CHECK2-NEXT: store i32 [[ADD8]], ptr [[TMP0]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP16]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void +// diff --git a/clang/test/OpenMP/parallel_generic_loop_codegen.cpp b/clang/test/OpenMP/parallel_generic_loop_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/parallel_generic_loop_codegen.cpp @@ -0,0 +1,224 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +#define N 64 +int foo() { + int x = 0; + int result[N] = {0}; + + #pragma omp parallel loop num_threads(N) allocate(x) private(x) collapse(2) + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + result[i] = i + j + x; + return 0; +} +#endif +// IR-LABEL: define {{[^@]+}}@_Z3foov +// IR-SAME: () #[[ATTR0:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[X:%.*]] = alloca i32, align 4 +// IR-NEXT: [[RESULT:%.*]] = alloca [64 x i32], align 16 +// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// IR-NEXT: store i32 0, ptr [[X]], align 4 +// IR-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[RESULT]], i8 0, i64 256, i1 false) +// IR-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP0]], i32 64) +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @_Z3foov.omp_outlined, ptr [[RESULT]]) +// IR-NEXT: ret i32 0 +// +// +// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[RESULT:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8 +// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 4095, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// IR-NEXT: [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr null) +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 4095 +// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 4095, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// IR-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// IR: omp.inner.for.cond.cleanup: +// IR-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 64 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 64 +// IR-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 64 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// IR-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 +// IR-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// IR-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4 +// IR-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP13]] +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// IR-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX]], align 4 +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 +// IR-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// IR-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr null) +// IR-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov +// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[X:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[RESULT:%.*]] = alloca [64 x i32], align 16 +// IR-PCH-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// IR-PCH-NEXT: store i32 0, ptr [[X]], align 4 +// IR-PCH-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[RESULT]], i8 0, i64 256, i1 false) +// IR-PCH-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP0]], i32 64) +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @_Z3foov.omp_outlined, ptr [[RESULT]]) +// IR-PCH-NEXT: ret i32 0 +// +// +// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[RESULT:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 4095, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// IR-PCH-NEXT: [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr null) +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 4095 +// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 4095, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// IR-PCH-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// IR-PCH: omp.inner.for.cond.cleanup: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 64 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 64 +// IR-PCH-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 64 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// IR-PCH-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 +// IR-PCH-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// IR-PCH-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4 +// IR-PCH-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP13]] +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// IR-PCH-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX]], align 4 +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 +// IR-PCH-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// IR-PCH-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr null) +// IR-PCH-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_parallel_generic_loop_codegen-1.cpp b/clang/test/OpenMP/target_parallel_generic_loop_codegen-1.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_parallel_generic_loop_codegen-1.cpp @@ -0,0 +1,8390 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s + +// Test target parallel for codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s + +// Check that no target code is emitted if no omptests flag was provided. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix OMP-DEFAULT +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix OMP-DEFAULT +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix OMP-DEfAULT +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix OMP-DEFAULT + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s + +// Test target parallel for codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s + +// Check that no target code is emitted if no omptests flag was provided. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK-NTARGET-OMP-DEFAULT + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + + + + + +// We have 7 target regions + + + + + +// We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function. + + +extern int *R; + +struct SA { + int arr[4]; + void foo() { + int a = *R; + a += 1; + *R = a; + } + SA() { + int a = *R; + a += 2; + *R = a; + } + ~SA() { + int a = *R; + a += 3; + *R = a; + } +}; + +struct SB { + int arr[8]; + void foo() { + int a = *R; + #pragma omp target parallel loop + for (int i = 0; i < 10; ++i) + a += 4; + *R = a; + } + SB() { + int a = *R; + a += 5; + *R = a; + } + ~SB() { + int a = *R; + a += 6; + *R = a; + } +}; + +struct SC { + int arr[16]; + void foo() { + int a = *R; + a += 7; + *R = a; + } + SC() { + int a = *R; + #pragma omp target parallel loop + for (int i = 0; i < 10; ++i) + a += 8; + *R = a; + } + ~SC() { + int a = *R; + a += 9; + *R = a; + } +}; + +struct SD { + int arr[32]; + void foo() { + int a = *R; + a += 10; + *R = a; + } + SD() { + int a = *R; + a += 11; + *R = a; + } + ~SD() { + int a = *R; + #pragma omp target parallel loop + for (int i = 0; i < 10; ++i) + a += 12; + *R = a; + } +}; + +struct SE { + int arr[64]; + void foo() { + int a = *R; + #pragma omp target parallel loop if(target: 0) + for (int i = 0; i < 10; ++i) + a += 13; + *R = a; + } + SE() { + int a = *R; + #pragma omp target parallel loop + for (int i = 0; i < 10; ++i) + a += 14; + *R = a; + } + ~SE() { + int a = *R; + #pragma omp target parallel loop + for (int i = 0; i < 10; ++i) + a += 15; + *R = a; + } +}; + +template +struct ST { + int arr[128 + x]; + void foo() { + int a = *R; + #pragma omp target parallel loop + for (int i = 0; i < 10; ++i) + a += 16 + x; + *R = a; + } + ST() { + int a = *R; + #pragma omp target parallel loop + for (int i = 0; i < 10; ++i) + a += 17 + x; + *R = a; + } + ~ST() { + int a = *R; + #pragma omp target parallel loop + for (int i = 0; i < 10; ++i) + a += 18 + x; + *R = a; + } +}; + +// We have to make sure we us all the target regions: + + + + +// We have 2 initializers with priority 500 + +// We have 1 initializers with priority 501 + +// We have 6 initializers with default priority + +static __attribute__((init_priority(500))) SA a1; +SA a2; +SB __attribute__((init_priority(500))) b1; +SB __attribute__((init_priority(501))) b2; +static SC c1; +SD d1; +SE e1; +ST<100> t1; +ST<1000> t2; + + +int bar(int a){ + int r = a; + + a1.foo(); + a2.foo(); + b1.foo(); + b2.foo(); + c1.foo(); + d1.foo(); + e1.foo(); + t1.foo(); + t2.foo(); + + #pragma omp target parallel loop + for (int i = 0; i < 10; ++i) + ++r; + + return r + *R; +} + +// Check metadata is properly generated: + + +#endif +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init +// CHECK-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @_ZL2a1) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @_ZL2a1, ptr @__dso_handle) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: ret void +// +// +// +// +// +// +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 +// CHECK-SAME: () #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @a2) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @a2, ptr @__dso_handle) #[[ATTR3]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 +// CHECK-SAME: () #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b1) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b1, ptr @__dso_handle) #[[ATTR3]] +// CHECK-NEXT: ret void +// +// +// +// +// +// +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 +// CHECK-SAME: () #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b2) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b2, ptr @__dso_handle) #[[ATTR3]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.4 +// CHECK-SAME: () #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @_ZN2SCC1Ev(ptr noundef nonnull align 4 dereferenceable(64) @_ZL2c1) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SCD1Ev, ptr @_ZL2c1, ptr @__dso_handle) #[[ATTR3]] +// CHECK-NEXT: ret void +// +// +// +// +// +// +// +// +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.7 +// CHECK-SAME: () #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @_ZN2SDC1Ev(ptr noundef nonnull align 4 dereferenceable(128) @d1) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SDD1Ev, ptr @d1, ptr @__dso_handle) #[[ATTR3]] +// CHECK-NEXT: ret void +// +// +// +// +// +// +// +// +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.10 +// CHECK-SAME: () #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @_ZN2SEC1Ev(ptr noundef nonnull align 4 dereferenceable(256) @e1) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SED1Ev, ptr @e1, ptr @__dso_handle) #[[ATTR3]] +// CHECK-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.15 +// CHECK-SAME: () #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @_ZN2STILi100EEC1Ev(ptr noundef nonnull align 4 dereferenceable(912) @t1) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi100EED1Ev, ptr @t1, ptr @__dso_handle) #[[ATTR3]] +// CHECK-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init.20 +// CHECK-SAME: () #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @_ZN2STILi1000EEC1Ev(ptr noundef nonnull align 4 dereferenceable(4512) @t2) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi1000EED1Ev, ptr @t2, ptr @__dso_handle) #[[ATTR3]] +// CHECK-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// CHECK-LABEL: define {{[^@]+}}@_GLOBAL__I_000500 +// CHECK-SAME: () #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__cxx_global_var_init() +// CHECK-NEXT: call void @__cxx_global_var_init.2() +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_GLOBAL__I_000501 +// CHECK-SAME: () #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__cxx_global_var_init.3() +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_parallel_generic_loop_codegen_1.cpp +// CHECK-SAME: () #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__cxx_global_var_init.1() +// CHECK-NEXT: call void @__cxx_global_var_init.4() +// CHECK-NEXT: call void @__cxx_global_var_init.7() +// CHECK-NEXT: call void @__cxx_global_var_init.10() +// CHECK-NEXT: call void @__cxx_global_var_init.15() +// CHECK-NEXT: call void @__cxx_global_var_init.20() +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK-SAME: () #[[ATTR4]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@__cxx_global_var_init +// SIMD-ONLY0-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @_ZL2a1) +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @_ZL2a1, ptr @__dso_handle) #[[ATTR2:[0-9]+]] +// SIMD-ONLY0-NEXT: ret void +// +// +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 +// SIMD-ONLY0-SAME: () #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @a2) +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @a2, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 +// SIMD-ONLY0-SAME: () #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b1) +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY0-NEXT: ret void +// +// +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 +// SIMD-ONLY0-SAME: () #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b2) +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b2, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@__cxx_global_var_init.4 +// SIMD-ONLY0-SAME: () #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: call void @_ZN2SCC1Ev(ptr noundef nonnull align 4 dereferenceable(64) @_ZL2c1) +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SCD1Ev, ptr @_ZL2c1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY0-NEXT: ret void +// +// +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@__cxx_global_var_init.5 +// SIMD-ONLY0-SAME: () #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: call void @_ZN2SDC1Ev(ptr noundef nonnull align 4 dereferenceable(128) @d1) +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SDD1Ev, ptr @d1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY0-NEXT: ret void +// +// +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@__cxx_global_var_init.6 +// SIMD-ONLY0-SAME: () #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: call void @_ZN2SEC1Ev(ptr noundef nonnull align 4 dereferenceable(256) @e1) +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SED1Ev, ptr @e1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY0-NEXT: ret void +// +// +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@__cxx_global_var_init.7 +// SIMD-ONLY0-SAME: () #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: call void @_ZN2STILi100EEC1Ev(ptr noundef nonnull align 4 dereferenceable(912) @t1) +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi100EED1Ev, ptr @t1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY0-NEXT: ret void +// +// +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@__cxx_global_var_init.8 +// SIMD-ONLY0-SAME: () #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: call void @_ZN2STILi1000EEC1Ev(ptr noundef nonnull align 4 dereferenceable(4512) @t2) +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi1000EED1Ev, ptr @t2, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY0-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_GLOBAL__I_000500 +// SIMD-ONLY0-SAME: () #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: call void @__cxx_global_var_init() +// SIMD-ONLY0-NEXT: call void @__cxx_global_var_init.2() +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_GLOBAL__I_000501 +// SIMD-ONLY0-SAME: () #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: call void @__cxx_global_var_init.3() +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_parallel_generic_loop_codegen_1.cpp +// SIMD-ONLY0-SAME: () #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: call void @__cxx_global_var_init.1() +// SIMD-ONLY0-NEXT: call void @__cxx_global_var_init.4() +// SIMD-ONLY0-NEXT: call void @__cxx_global_var_init.5() +// SIMD-ONLY0-NEXT: call void @__cxx_global_var_init.6() +// SIMD-ONLY0-NEXT: call void @__cxx_global_var_init.7() +// SIMD-ONLY0-NEXT: call void @__cxx_global_var_init.8() +// SIMD-ONLY0-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@__cxx_global_var_init +// SIMD-ONLY1-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @_ZL2a1) +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @_ZL2a1, ptr @__dso_handle) #[[ATTR2:[0-9]+]] +// SIMD-ONLY1-NEXT: ret void +// +// +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 +// SIMD-ONLY1-SAME: () #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @a2) +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @a2, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 +// SIMD-ONLY1-SAME: () #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b1) +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY1-NEXT: ret void +// +// +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 +// SIMD-ONLY1-SAME: () #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b2) +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b2, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@__cxx_global_var_init.4 +// SIMD-ONLY1-SAME: () #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: call void @_ZN2SCC1Ev(ptr noundef nonnull align 4 dereferenceable(64) @_ZL2c1) +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SCD1Ev, ptr @_ZL2c1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY1-NEXT: ret void +// +// +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@__cxx_global_var_init.5 +// SIMD-ONLY1-SAME: () #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: call void @_ZN2SDC1Ev(ptr noundef nonnull align 4 dereferenceable(128) @d1) +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SDD1Ev, ptr @d1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY1-NEXT: ret void +// +// +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@__cxx_global_var_init.6 +// SIMD-ONLY1-SAME: () #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: call void @_ZN2SEC1Ev(ptr noundef nonnull align 4 dereferenceable(256) @e1) +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SED1Ev, ptr @e1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY1-NEXT: ret void +// +// +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@__cxx_global_var_init.7 +// SIMD-ONLY1-SAME: () #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: call void @_ZN2STILi100EEC1Ev(ptr noundef nonnull align 4 dereferenceable(912) @t1) +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi100EED1Ev, ptr @t1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY1-NEXT: ret void +// +// +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@__cxx_global_var_init.8 +// SIMD-ONLY1-SAME: () #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: call void @_ZN2STILi1000EEC1Ev(ptr noundef nonnull align 4 dereferenceable(4512) @t2) +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi1000EED1Ev, ptr @t2, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY1-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_GLOBAL__I_000500 +// SIMD-ONLY1-SAME: () #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: call void @__cxx_global_var_init() +// SIMD-ONLY1-NEXT: call void @__cxx_global_var_init.2() +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_GLOBAL__I_000501 +// SIMD-ONLY1-SAME: () #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: call void @__cxx_global_var_init.3() +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_parallel_generic_loop_codegen_1.cpp +// SIMD-ONLY1-SAME: () #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: call void @__cxx_global_var_init.1() +// SIMD-ONLY1-NEXT: call void @__cxx_global_var_init.4() +// SIMD-ONLY1-NEXT: call void @__cxx_global_var_init.5() +// SIMD-ONLY1-NEXT: call void @__cxx_global_var_init.6() +// SIMD-ONLY1-NEXT: call void @__cxx_global_var_init.7() +// SIMD-ONLY1-NEXT: call void @__cxx_global_var_init.8() +// SIMD-ONLY1-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_Z3bari +// CHECK-NTARGET-SAME: (i32 noundef signext [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[R:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[R_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP0]], ptr [[R]], align 4 +// CHECK-NTARGET-NEXT: call void @_ZN2SA3fooEv(ptr noundef nonnull align 4 dereferenceable(16) @_ZL2a1) +// CHECK-NTARGET-NEXT: call void @_ZN2SA3fooEv(ptr noundef nonnull align 4 dereferenceable(16) @a2) +// CHECK-NTARGET-NEXT: call void @_ZN2SB3fooEv(ptr noundef nonnull align 4 dereferenceable(32) @b1) +// CHECK-NTARGET-NEXT: call void @_ZN2SB3fooEv(ptr noundef nonnull align 4 dereferenceable(32) @b2) +// CHECK-NTARGET-NEXT: call void @_ZN2SC3fooEv(ptr noundef nonnull align 4 dereferenceable(64) @_ZL2c1) +// CHECK-NTARGET-NEXT: call void @_ZN2SD3fooEv(ptr noundef nonnull align 4 dereferenceable(128) @d1) +// CHECK-NTARGET-NEXT: call void @_ZN2SE3fooEv(ptr noundef nonnull align 4 dereferenceable(256) @e1) +// CHECK-NTARGET-NEXT: call void @_ZN2STILi100EE3fooEv(ptr noundef nonnull align 4 dereferenceable(912) @t1) +// CHECK-NTARGET-NEXT: call void @_ZN2STILi1000EE3fooEv(ptr noundef nonnull align 4 dereferenceable(4512) @t2) +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[R]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[R_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i64, ptr [[R_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3bari_l267(i64 [[TMP2]]) #[[ATTR2:[0-9]+]] +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[R]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP5]] +// CHECK-NTARGET-NEXT: ret i32 [[ADD]] +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SA3fooEv +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SB3fooEv +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SB3fooEv_l122(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SC3fooEv +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 7 +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SD3fooEv +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 10 +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SE3fooEv +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SE3fooEv_l185(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2STILi100EE3fooEv +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EE3fooEv_l211(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2STILi1000EE3fooEv +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EE3fooEv_l211(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3bari_l267 +// CHECK-NTARGET-SAME: (i64 noundef [[R:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[R_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[R_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store i64 [[R]], ptr [[R_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[R_ADDR]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP0]], ptr [[R_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[R_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3bari_l267.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3bari_l267.omp_outlined +// CHECK-NTARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[R:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[R_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i64 [[R]], ptr [[R_ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET: cond.true: +// CHECK-NTARGET-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET: cond.false: +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: br label [[COND_END]] +// CHECK-NTARGET: cond.end: +// CHECK-NTARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET: omp.inner.for.cond: +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET: omp.inner.for.body: +// CHECK-NTARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[R_ADDR]], align 4 +// CHECK-NTARGET-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-NTARGET-NEXT: store i32 [[INC]], ptr [[R_ADDR]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET: omp.body.continue: +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET: omp.inner.for.inc: +// CHECK-NTARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET: omp.inner.for.end: +// CHECK-NTARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET: omp.loop.exit: +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@__cxx_global_var_init +// CHECK-NTARGET-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @_ZL2a1) +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @_ZL2a1, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SAC1Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR4:[0-9]+]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: call void @_ZN2SAC2Ev(ptr noundef nonnull align 4 dereferenceable(16) [[THIS1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SAD1Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: call void @_ZN2SAD2Ev(ptr noundef nonnull align 4 dereferenceable(16) [[THIS1]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SAC2Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 2 +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SAD2Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 3 +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 +// CHECK-NTARGET-SAME: () #[[ATTR3]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @a2) +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @a2, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 +// CHECK-NTARGET-SAME: () #[[ATTR3]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b1) +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b1, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SBC1Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: call void @_ZN2SBC2Ev(ptr noundef nonnull align 4 dereferenceable(32) [[THIS1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SBD1Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: call void @_ZN2SBD2Ev(ptr noundef nonnull align 4 dereferenceable(32) [[THIS1]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SBC2Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 5 +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SBD2Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 6 +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 +// CHECK-NTARGET-SAME: () #[[ATTR3]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b2) +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b2, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@__cxx_global_var_init.4 +// CHECK-NTARGET-SAME: () #[[ATTR3]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: call void @_ZN2SCC1Ev(ptr noundef nonnull align 4 dereferenceable(64) @_ZL2c1) +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SCD1Ev, ptr @_ZL2c1, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SCC1Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: call void @_ZN2SCC2Ev(ptr noundef nonnull align 4 dereferenceable(64) [[THIS1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SCD1Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: call void @_ZN2SCD2Ev(ptr noundef nonnull align 4 dereferenceable(64) [[THIS1]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SCC2Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SCC1Ev_l148(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SCC1Ev_l148 +// CHECK-NTARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SCC1Ev_l148.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SCC1Ev_l148.omp_outlined +// CHECK-NTARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET: cond.true: +// CHECK-NTARGET-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET: cond.false: +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: br label [[COND_END]] +// CHECK-NTARGET: cond.end: +// CHECK-NTARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET: omp.inner.for.cond: +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET: omp.inner.for.body: +// CHECK-NTARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 8 +// CHECK-NTARGET-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET: omp.body.continue: +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET: omp.inner.for.inc: +// CHECK-NTARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET: omp.inner.for.end: +// CHECK-NTARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET: omp.loop.exit: +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SCD2Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 9 +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@__cxx_global_var_init.5 +// CHECK-NTARGET-SAME: () #[[ATTR3]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: call void @_ZN2SDC1Ev(ptr noundef nonnull align 4 dereferenceable(128) @d1) +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SDD1Ev, ptr @d1, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SDC1Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: call void @_ZN2SDC2Ev(ptr noundef nonnull align 4 dereferenceable(128) [[THIS1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SDD1Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: call void @_ZN2SDD2Ev(ptr noundef nonnull align 4 dereferenceable(128) [[THIS1]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SDC2Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 11 +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SDD2Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SDD1Ev_l174(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SDD1Ev_l174 +// CHECK-NTARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SDD1Ev_l174.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SDD1Ev_l174.omp_outlined +// CHECK-NTARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET: cond.true: +// CHECK-NTARGET-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET: cond.false: +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: br label [[COND_END]] +// CHECK-NTARGET: cond.end: +// CHECK-NTARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET: omp.inner.for.cond: +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET: omp.inner.for.body: +// CHECK-NTARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 12 +// CHECK-NTARGET-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET: omp.body.continue: +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET: omp.inner.for.inc: +// CHECK-NTARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET: omp.inner.for.end: +// CHECK-NTARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET: omp.loop.exit: +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@__cxx_global_var_init.6 +// CHECK-NTARGET-SAME: () #[[ATTR3]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: call void @_ZN2SEC1Ev(ptr noundef nonnull align 4 dereferenceable(256) @e1) +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SED1Ev, ptr @e1, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SEC1Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: call void @_ZN2SEC2Ev(ptr noundef nonnull align 4 dereferenceable(256) [[THIS1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SED1Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: call void @_ZN2SED2Ev(ptr noundef nonnull align 4 dereferenceable(256) [[THIS1]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SEC2Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SEC1Ev_l192(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SEC1Ev_l192 +// CHECK-NTARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SEC1Ev_l192.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SEC1Ev_l192.omp_outlined +// CHECK-NTARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET: cond.true: +// CHECK-NTARGET-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET: cond.false: +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: br label [[COND_END]] +// CHECK-NTARGET: cond.end: +// CHECK-NTARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET: omp.inner.for.cond: +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET: omp.inner.for.body: +// CHECK-NTARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 14 +// CHECK-NTARGET-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET: omp.body.continue: +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET: omp.inner.for.inc: +// CHECK-NTARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET: omp.inner.for.end: +// CHECK-NTARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET: omp.loop.exit: +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2SED2Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SED1Ev_l199(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SED1Ev_l199 +// CHECK-NTARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SED1Ev_l199.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SED1Ev_l199.omp_outlined +// CHECK-NTARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET: cond.true: +// CHECK-NTARGET-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET: cond.false: +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: br label [[COND_END]] +// CHECK-NTARGET: cond.end: +// CHECK-NTARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET: omp.inner.for.cond: +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET: omp.inner.for.body: +// CHECK-NTARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 15 +// CHECK-NTARGET-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET: omp.body.continue: +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET: omp.inner.for.inc: +// CHECK-NTARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET: omp.inner.for.end: +// CHECK-NTARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET: omp.loop.exit: +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@__cxx_global_var_init.7 +// CHECK-NTARGET-SAME: () #[[ATTR3]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: call void @_ZN2STILi100EEC1Ev(ptr noundef nonnull align 4 dereferenceable(912) @t1) +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi100EED1Ev, ptr @t1, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2STILi100EEC1Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: call void @_ZN2STILi100EEC2Ev(ptr noundef nonnull align 4 dereferenceable(912) [[THIS1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2STILi100EED1Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: call void @_ZN2STILi100EED2Ev(ptr noundef nonnull align 4 dereferenceable(912) [[THIS1]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2STILi100EEC2Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EEC1Ev_l218(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EEC1Ev_l218 +// CHECK-NTARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EEC1Ev_l218.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EEC1Ev_l218.omp_outlined +// CHECK-NTARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET: cond.true: +// CHECK-NTARGET-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET: cond.false: +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: br label [[COND_END]] +// CHECK-NTARGET: cond.end: +// CHECK-NTARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET: omp.inner.for.cond: +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET: omp.inner.for.body: +// CHECK-NTARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 117 +// CHECK-NTARGET-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET: omp.body.continue: +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET: omp.inner.for.inc: +// CHECK-NTARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET: omp.inner.for.end: +// CHECK-NTARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET: omp.loop.exit: +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2STILi100EED2Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EED1Ev_l225(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EED1Ev_l225 +// CHECK-NTARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EED1Ev_l225.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EED1Ev_l225.omp_outlined +// CHECK-NTARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET: cond.true: +// CHECK-NTARGET-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET: cond.false: +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: br label [[COND_END]] +// CHECK-NTARGET: cond.end: +// CHECK-NTARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET: omp.inner.for.cond: +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET: omp.inner.for.body: +// CHECK-NTARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 118 +// CHECK-NTARGET-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET: omp.body.continue: +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET: omp.inner.for.inc: +// CHECK-NTARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET: omp.inner.for.end: +// CHECK-NTARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET: omp.loop.exit: +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@__cxx_global_var_init.8 +// CHECK-NTARGET-SAME: () #[[ATTR3]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: call void @_ZN2STILi1000EEC1Ev(ptr noundef nonnull align 4 dereferenceable(4512) @t2) +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi1000EED1Ev, ptr @t2, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2STILi1000EEC1Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: call void @_ZN2STILi1000EEC2Ev(ptr noundef nonnull align 4 dereferenceable(4512) [[THIS1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2STILi1000EED1Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: call void @_ZN2STILi1000EED2Ev(ptr noundef nonnull align 4 dereferenceable(4512) [[THIS1]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2STILi1000EEC2Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EEC1Ev_l218(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EEC1Ev_l218 +// CHECK-NTARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EEC1Ev_l218.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EEC1Ev_l218.omp_outlined +// CHECK-NTARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET: cond.true: +// CHECK-NTARGET-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET: cond.false: +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: br label [[COND_END]] +// CHECK-NTARGET: cond.end: +// CHECK-NTARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET: omp.inner.for.cond: +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET: omp.inner.for.body: +// CHECK-NTARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1017 +// CHECK-NTARGET-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET: omp.body.continue: +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET: omp.inner.for.inc: +// CHECK-NTARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET: omp.inner.for.end: +// CHECK-NTARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET: omp.loop.exit: +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_ZN2STILi1000EED2Ev +// CHECK-NTARGET-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EED1Ev_l225(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EED1Ev_l225 +// CHECK-NTARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EED1Ev_l225.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EED1Ev_l225.omp_outlined +// CHECK-NTARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET: cond.true: +// CHECK-NTARGET-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET: cond.false: +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: br label [[COND_END]] +// CHECK-NTARGET: cond.end: +// CHECK-NTARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET: omp.inner.for.cond: +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET: omp.inner.for.body: +// CHECK-NTARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1018 +// CHECK-NTARGET-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET: omp.body.continue: +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET: omp.inner.for.inc: +// CHECK-NTARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET: omp.inner.for.end: +// CHECK-NTARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET: omp.loop.exit: +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SB3fooEv_l122 +// CHECK-NTARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SB3fooEv_l122.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SB3fooEv_l122.omp_outlined +// CHECK-NTARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET: cond.true: +// CHECK-NTARGET-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET: cond.false: +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: br label [[COND_END]] +// CHECK-NTARGET: cond.end: +// CHECK-NTARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET: omp.inner.for.cond: +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET: omp.inner.for.body: +// CHECK-NTARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 4 +// CHECK-NTARGET-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET: omp.body.continue: +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET: omp.inner.for.inc: +// CHECK-NTARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET: omp.inner.for.end: +// CHECK-NTARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET: omp.loop.exit: +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SE3fooEv_l185 +// CHECK-NTARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SE3fooEv_l185.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SE3fooEv_l185.omp_outlined +// CHECK-NTARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET: cond.true: +// CHECK-NTARGET-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET: cond.false: +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: br label [[COND_END]] +// CHECK-NTARGET: cond.end: +// CHECK-NTARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET: omp.inner.for.cond: +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET: omp.inner.for.body: +// CHECK-NTARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 13 +// CHECK-NTARGET-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET: omp.body.continue: +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET: omp.inner.for.inc: +// CHECK-NTARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET: omp.inner.for.end: +// CHECK-NTARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET: omp.loop.exit: +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EE3fooEv_l211 +// CHECK-NTARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EE3fooEv_l211.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EE3fooEv_l211.omp_outlined +// CHECK-NTARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET: cond.true: +// CHECK-NTARGET-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET: cond.false: +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: br label [[COND_END]] +// CHECK-NTARGET: cond.end: +// CHECK-NTARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET: omp.inner.for.cond: +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET: omp.inner.for.body: +// CHECK-NTARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 116 +// CHECK-NTARGET-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET: omp.body.continue: +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET: omp.inner.for.inc: +// CHECK-NTARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET: omp.inner.for.end: +// CHECK-NTARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET: omp.loop.exit: +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EE3fooEv_l211 +// CHECK-NTARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EE3fooEv_l211.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EE3fooEv_l211.omp_outlined +// CHECK-NTARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET: cond.true: +// CHECK-NTARGET-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET: cond.false: +// CHECK-NTARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: br label [[COND_END]] +// CHECK-NTARGET: cond.end: +// CHECK-NTARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET: omp.inner.for.cond: +// CHECK-NTARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET: omp.inner.for.body: +// CHECK-NTARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1016 +// CHECK-NTARGET-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET: omp.body.continue: +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET: omp.inner.for.inc: +// CHECK-NTARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET: omp.inner.for.end: +// CHECK-NTARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET: omp.loop.exit: +// CHECK-NTARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_GLOBAL__I_000500 +// CHECK-NTARGET-SAME: () #[[ATTR3]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: call void @__cxx_global_var_init() +// CHECK-NTARGET-NEXT: call void @__cxx_global_var_init.2() +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_GLOBAL__I_000501 +// CHECK-NTARGET-SAME: () #[[ATTR3]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: call void @__cxx_global_var_init.3() +// CHECK-NTARGET-NEXT: ret void +// +// +// CHECK-NTARGET-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_parallel_generic_loop_codegen_1.cpp +// CHECK-NTARGET-SAME: () #[[ATTR3]] { +// CHECK-NTARGET-NEXT: entry: +// CHECK-NTARGET-NEXT: call void @__cxx_global_var_init.1() +// CHECK-NTARGET-NEXT: call void @__cxx_global_var_init.4() +// CHECK-NTARGET-NEXT: call void @__cxx_global_var_init.5() +// CHECK-NTARGET-NEXT: call void @__cxx_global_var_init.6() +// CHECK-NTARGET-NEXT: call void @__cxx_global_var_init.7() +// CHECK-NTARGET-NEXT: call void @__cxx_global_var_init.8() +// CHECK-NTARGET-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@__cxx_global_var_init +// SIMD-ONLY2-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @_ZL2a1) +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @_ZL2a1, ptr @__dso_handle) #[[ATTR2:[0-9]+]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SAC1Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: call void @_ZN2SAC2Ev(ptr noundef nonnull align 4 dereferenceable(16) [[THIS1]]) +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SAD1Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: call void @_ZN2SAD2Ev(ptr noundef nonnull align 4 dereferenceable(16) [[THIS1]]) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 +// SIMD-ONLY2-SAME: () #[[ATTR0]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @a2) +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @a2, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 +// SIMD-ONLY2-SAME: () #[[ATTR0]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b1) +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SBC1Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: call void @_ZN2SBC2Ev(ptr noundef nonnull align 4 dereferenceable(32) [[THIS1]]) +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SBD1Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: call void @_ZN2SBD2Ev(ptr noundef nonnull align 4 dereferenceable(32) [[THIS1]]) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 +// SIMD-ONLY2-SAME: () #[[ATTR0]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b2) +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b2, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@__cxx_global_var_init.4 +// SIMD-ONLY2-SAME: () #[[ATTR0]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: call void @_ZN2SCC1Ev(ptr noundef nonnull align 4 dereferenceable(64) @_ZL2c1) +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SCD1Ev, ptr @_ZL2c1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SCC1Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: call void @_ZN2SCC2Ev(ptr noundef nonnull align 4 dereferenceable(64) [[THIS1]]) +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SCD1Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: call void @_ZN2SCD2Ev(ptr noundef nonnull align 4 dereferenceable(64) [[THIS1]]) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@__cxx_global_var_init.5 +// SIMD-ONLY2-SAME: () #[[ATTR0]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: call void @_ZN2SDC1Ev(ptr noundef nonnull align 4 dereferenceable(128) @d1) +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SDD1Ev, ptr @d1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SDC1Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: call void @_ZN2SDC2Ev(ptr noundef nonnull align 4 dereferenceable(128) [[THIS1]]) +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SDD1Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: call void @_ZN2SDD2Ev(ptr noundef nonnull align 4 dereferenceable(128) [[THIS1]]) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@__cxx_global_var_init.6 +// SIMD-ONLY2-SAME: () #[[ATTR0]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: call void @_ZN2SEC1Ev(ptr noundef nonnull align 4 dereferenceable(256) @e1) +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SED1Ev, ptr @e1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SEC1Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: call void @_ZN2SEC2Ev(ptr noundef nonnull align 4 dereferenceable(256) [[THIS1]]) +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SED1Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: call void @_ZN2SED2Ev(ptr noundef nonnull align 4 dereferenceable(256) [[THIS1]]) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@__cxx_global_var_init.7 +// SIMD-ONLY2-SAME: () #[[ATTR0]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: call void @_ZN2STILi100EEC1Ev(ptr noundef nonnull align 4 dereferenceable(912) @t1) +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi100EED1Ev, ptr @t1, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2STILi100EEC1Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: call void @_ZN2STILi100EEC2Ev(ptr noundef nonnull align 4 dereferenceable(912) [[THIS1]]) +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2STILi100EED1Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: call void @_ZN2STILi100EED2Ev(ptr noundef nonnull align 4 dereferenceable(912) [[THIS1]]) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@__cxx_global_var_init.8 +// SIMD-ONLY2-SAME: () #[[ATTR0]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: call void @_ZN2STILi1000EEC1Ev(ptr noundef nonnull align 4 dereferenceable(4512) @t2) +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi1000EED1Ev, ptr @t2, ptr @__dso_handle) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2STILi1000EEC1Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: call void @_ZN2STILi1000EEC2Ev(ptr noundef nonnull align 4 dereferenceable(4512) [[THIS1]]) +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2STILi1000EED1Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: call void @_ZN2STILi1000EED2Ev(ptr noundef nonnull align 4 dereferenceable(4512) [[THIS1]]) #[[ATTR2]] +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_Z3bari +// SIMD-ONLY2-SAME: (i32 noundef signext [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[R:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP0]], ptr [[R]], align 4 +// SIMD-ONLY2-NEXT: call void @_ZN2SA3fooEv(ptr noundef nonnull align 4 dereferenceable(16) @_ZL2a1) +// SIMD-ONLY2-NEXT: call void @_ZN2SA3fooEv(ptr noundef nonnull align 4 dereferenceable(16) @a2) +// SIMD-ONLY2-NEXT: call void @_ZN2SB3fooEv(ptr noundef nonnull align 4 dereferenceable(32) @b1) +// SIMD-ONLY2-NEXT: call void @_ZN2SB3fooEv(ptr noundef nonnull align 4 dereferenceable(32) @b2) +// SIMD-ONLY2-NEXT: call void @_ZN2SC3fooEv(ptr noundef nonnull align 4 dereferenceable(64) @_ZL2c1) +// SIMD-ONLY2-NEXT: call void @_ZN2SD3fooEv(ptr noundef nonnull align 4 dereferenceable(128) @d1) +// SIMD-ONLY2-NEXT: call void @_ZN2SE3fooEv(ptr noundef nonnull align 4 dereferenceable(256) @e1) +// SIMD-ONLY2-NEXT: call void @_ZN2STILi100EE3fooEv(ptr noundef nonnull align 4 dereferenceable(912) @t1) +// SIMD-ONLY2-NEXT: call void @_ZN2STILi1000EE3fooEv(ptr noundef nonnull align 4 dereferenceable(4512) @t2) +// SIMD-ONLY2-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY2: for.cond: +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// SIMD-ONLY2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY2: for.body: +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[R]], align 4 +// SIMD-ONLY2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY2-NEXT: store i32 [[INC]], ptr [[R]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY2: for.inc: +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[INC1:%.*]] = add nsw i32 [[TMP3]], 1 +// SIMD-ONLY2-NEXT: store i32 [[INC1]], ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// SIMD-ONLY2: for.end: +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load i32, ptr [[R]], align 4 +// SIMD-ONLY2-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP6]] +// SIMD-ONLY2-NEXT: ret i32 [[ADD]] +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SA3fooEv +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) #[[ATTR3]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SB3fooEv +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) #[[ATTR3]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY2: for.cond: +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// SIMD-ONLY2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY2: for.body: +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 4 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY2: for.inc: +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// SIMD-ONLY2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// SIMD-ONLY2: for.end: +// SIMD-ONLY2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP6:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP5]], ptr [[TMP6]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SC3fooEv +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) #[[ATTR3]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 7 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SD3fooEv +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) #[[ATTR3]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 10 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SE3fooEv +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) #[[ATTR3]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY2: for.cond: +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// SIMD-ONLY2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY2: for.body: +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 13 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY2: for.inc: +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// SIMD-ONLY2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// SIMD-ONLY2: for.end: +// SIMD-ONLY2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP6:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP5]], ptr [[TMP6]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2STILi100EE3fooEv +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) #[[ATTR3]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY2: for.cond: +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// SIMD-ONLY2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY2: for.body: +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 116 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY2: for.inc: +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// SIMD-ONLY2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// SIMD-ONLY2: for.end: +// SIMD-ONLY2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP6:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP5]], ptr [[TMP6]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2STILi1000EE3fooEv +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) #[[ATTR3]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY2: for.cond: +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// SIMD-ONLY2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY2: for.body: +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1016 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY2: for.inc: +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// SIMD-ONLY2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// SIMD-ONLY2: for.end: +// SIMD-ONLY2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP6:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP5]], ptr [[TMP6]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SAC2Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 2 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SAD2Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 3 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SBC2Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 5 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SBD2Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 6 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SCC2Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY2: for.cond: +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// SIMD-ONLY2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY2: for.body: +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 8 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY2: for.inc: +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// SIMD-ONLY2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// SIMD-ONLY2: for.end: +// SIMD-ONLY2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP6:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP5]], ptr [[TMP6]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SCD2Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 9 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SDC2Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 11 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SDD2Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY2: for.cond: +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// SIMD-ONLY2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY2: for.body: +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 12 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY2: for.inc: +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// SIMD-ONLY2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// SIMD-ONLY2: for.end: +// SIMD-ONLY2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP6:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP5]], ptr [[TMP6]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SEC2Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY2: for.cond: +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// SIMD-ONLY2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY2: for.body: +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 14 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY2: for.inc: +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// SIMD-ONLY2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// SIMD-ONLY2: for.end: +// SIMD-ONLY2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP6:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP5]], ptr [[TMP6]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2SED2Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY2: for.cond: +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// SIMD-ONLY2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY2: for.body: +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 15 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY2: for.inc: +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// SIMD-ONLY2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// SIMD-ONLY2: for.end: +// SIMD-ONLY2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP6:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP5]], ptr [[TMP6]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2STILi100EEC2Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY2: for.cond: +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// SIMD-ONLY2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY2: for.body: +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 117 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY2: for.inc: +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// SIMD-ONLY2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// SIMD-ONLY2: for.end: +// SIMD-ONLY2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP6:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP5]], ptr [[TMP6]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2STILi100EED2Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY2: for.cond: +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// SIMD-ONLY2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY2: for.body: +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 118 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY2: for.inc: +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// SIMD-ONLY2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// SIMD-ONLY2: for.end: +// SIMD-ONLY2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP6:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP5]], ptr [[TMP6]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2STILi1000EEC2Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY2: for.cond: +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// SIMD-ONLY2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY2: for.body: +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1017 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY2: for.inc: +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// SIMD-ONLY2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// SIMD-ONLY2: for.end: +// SIMD-ONLY2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP6:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP5]], ptr [[TMP6]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_ZN2STILi1000EED2Ev +// SIMD-ONLY2-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY2-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY2-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// SIMD-ONLY2-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY2: for.cond: +// SIMD-ONLY2-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// SIMD-ONLY2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY2: for.body: +// SIMD-ONLY2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1018 +// SIMD-ONLY2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY2: for.inc: +// SIMD-ONLY2-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// SIMD-ONLY2-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// SIMD-ONLY2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// SIMD-ONLY2: for.end: +// SIMD-ONLY2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// SIMD-ONLY2-NEXT: [[TMP6:%.*]] = load ptr, ptr @R, align 8 +// SIMD-ONLY2-NEXT: store i32 [[TMP5]], ptr [[TMP6]], align 4 +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_GLOBAL__I_000500 +// SIMD-ONLY2-SAME: () #[[ATTR0]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: call void @__cxx_global_var_init() +// SIMD-ONLY2-NEXT: call void @__cxx_global_var_init.2() +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_GLOBAL__I_000501 +// SIMD-ONLY2-SAME: () #[[ATTR0]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: call void @__cxx_global_var_init.3() +// SIMD-ONLY2-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_parallel_generic_loop_codegen_1.cpp +// SIMD-ONLY2-SAME: () #[[ATTR0]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: call void @__cxx_global_var_init.1() +// SIMD-ONLY2-NEXT: call void @__cxx_global_var_init.4() +// SIMD-ONLY2-NEXT: call void @__cxx_global_var_init.5() +// SIMD-ONLY2-NEXT: call void @__cxx_global_var_init.6() +// SIMD-ONLY2-NEXT: call void @__cxx_global_var_init.7() +// SIMD-ONLY2-NEXT: call void @__cxx_global_var_init.8() +// SIMD-ONLY2-NEXT: ret void +// +// +// OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init +// OMP-DEFAULT-SAME: () #[[ATTR0:[0-9]+]] { +// OMP-DEFAULT-NEXT: entry: +// OMP-DEFAULT-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @_ZL2a1) +// OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @_ZL2a1, ptr @__dso_handle) #[[ATTR2:[0-9]+]] +// OMP-DEFAULT-NEXT: ret void +// +// +// +// +// +// +// OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 +// OMP-DEFAULT-SAME: () #[[ATTR0]] { +// OMP-DEFAULT-NEXT: entry: +// OMP-DEFAULT-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @a2) +// OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @a2, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEFAULT-NEXT: ret void +// +// +// OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 +// OMP-DEFAULT-SAME: () #[[ATTR0]] { +// OMP-DEFAULT-NEXT: entry: +// OMP-DEFAULT-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b1) +// OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b1, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEFAULT-NEXT: ret void +// +// +// +// +// +// +// OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 +// OMP-DEFAULT-SAME: () #[[ATTR0]] { +// OMP-DEFAULT-NEXT: entry: +// OMP-DEFAULT-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b2) +// OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b2, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEFAULT-NEXT: ret void +// +// +// OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.4 +// OMP-DEFAULT-SAME: () #[[ATTR0]] { +// OMP-DEFAULT-NEXT: entry: +// OMP-DEFAULT-NEXT: call void @_ZN2SCC1Ev(ptr noundef nonnull align 4 dereferenceable(64) @_ZL2c1) +// OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SCD1Ev, ptr @_ZL2c1, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEFAULT-NEXT: ret void +// +// +// +// +// +// +// +// +// OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.5 +// OMP-DEFAULT-SAME: () #[[ATTR0]] { +// OMP-DEFAULT-NEXT: entry: +// OMP-DEFAULT-NEXT: call void @_ZN2SDC1Ev(ptr noundef nonnull align 4 dereferenceable(128) @d1) +// OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SDD1Ev, ptr @d1, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEFAULT-NEXT: ret void +// +// +// +// +// +// +// +// +// OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.8 +// OMP-DEFAULT-SAME: () #[[ATTR0]] { +// OMP-DEFAULT-NEXT: entry: +// OMP-DEFAULT-NEXT: call void @_ZN2SEC1Ev(ptr noundef nonnull align 4 dereferenceable(256) @e1) +// OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SED1Ev, ptr @e1, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEFAULT-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.13 +// OMP-DEFAULT-SAME: () #[[ATTR0]] { +// OMP-DEFAULT-NEXT: entry: +// OMP-DEFAULT-NEXT: call void @_ZN2STILi100EEC1Ev(ptr noundef nonnull align 4 dereferenceable(912) @t1) +// OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi100EED1Ev, ptr @t1, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEFAULT-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.18 +// OMP-DEFAULT-SAME: () #[[ATTR0]] { +// OMP-DEFAULT-NEXT: entry: +// OMP-DEFAULT-NEXT: call void @_ZN2STILi1000EEC1Ev(ptr noundef nonnull align 4 dereferenceable(4512) @t2) +// OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi1000EED1Ev, ptr @t2, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEFAULT-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// OMP-DEFAULT-LABEL: define {{[^@]+}}@_GLOBAL__I_000500 +// OMP-DEFAULT-SAME: () #[[ATTR0]] { +// OMP-DEFAULT-NEXT: entry: +// OMP-DEFAULT-NEXT: call void @__cxx_global_var_init() +// OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.2() +// OMP-DEFAULT-NEXT: ret void +// +// +// OMP-DEFAULT-LABEL: define {{[^@]+}}@_GLOBAL__I_000501 +// OMP-DEFAULT-SAME: () #[[ATTR0]] { +// OMP-DEFAULT-NEXT: entry: +// OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.3() +// OMP-DEFAULT-NEXT: ret void +// +// +// OMP-DEFAULT-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_parallel_generic_loop_codegen_1.cpp +// OMP-DEFAULT-SAME: () #[[ATTR0]] { +// OMP-DEFAULT-NEXT: entry: +// OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.1() +// OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.4() +// OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.5() +// OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.8() +// OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.13() +// OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.18() +// OMP-DEFAULT-NEXT: ret void +// +// +// OMP-DEFAULT-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// OMP-DEFAULT-SAME: () #[[ATTR0]] { +// OMP-DEFAULT-NEXT: entry: +// OMP-DEFAULT-NEXT: call void @__tgt_register_requires(i64 1) +// OMP-DEFAULT-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init +// OMP-DEfAULT-SAME: () #[[ATTR0:[0-9]+]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @_ZL2a1) +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @_ZL2a1, ptr @__dso_handle) #[[ATTR2:[0-9]+]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SAC1Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2SAC2Ev(ptr noundef nonnull align 4 dereferenceable(16) [[THIS1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SAD1Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2SAD2Ev(ptr noundef nonnull align 4 dereferenceable(16) [[THIS1]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SAC2Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 2 +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SAD2Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 3 +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 +// OMP-DEfAULT-SAME: () #[[ATTR0]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @a2) +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @a2, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 +// OMP-DEfAULT-SAME: () #[[ATTR0]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b1) +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b1, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SBC1Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2SBC2Ev(ptr noundef nonnull align 4 dereferenceable(32) [[THIS1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SBD1Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2SBD2Ev(ptr noundef nonnull align 4 dereferenceable(32) [[THIS1]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SBC2Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 5 +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SBD2Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 6 +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 +// OMP-DEfAULT-SAME: () #[[ATTR0]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b2) +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b2, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.4 +// OMP-DEfAULT-SAME: () #[[ATTR0]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: call void @_ZN2SCC1Ev(ptr noundef nonnull align 4 dereferenceable(64) @_ZL2c1) +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SCD1Ev, ptr @_ZL2c1, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SCC1Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2SCC2Ev(ptr noundef nonnull align 4 dereferenceable(64) [[THIS1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SCD1Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2SCD2Ev(ptr noundef nonnull align 4 dereferenceable(64) [[THIS1]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SCC2Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP6]], align 4 +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP9]], align 4 +// OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[TMP10]], align 4 +// OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// OMP-DEfAULT-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// OMP-DEfAULT-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// OMP-DEfAULT-NEXT: store ptr @.offload_sizes, ptr [[TMP13]], align 4 +// OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// OMP-DEfAULT-NEXT: store ptr @.offload_maptypes, ptr [[TMP14]], align 4 +// OMP-DEfAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP15]], align 4 +// OMP-DEfAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP16]], align 4 +// OMP-DEfAULT-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP17]], align 8 +// OMP-DEfAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP18]], align 8 +// OMP-DEfAULT-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// OMP-DEfAULT-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP19]], align 4 +// OMP-DEfAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// OMP-DEfAULT-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4 +// OMP-DEfAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[TMP21]], align 4 +// OMP-DEfAULT-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SCC1Ev_l148.region_id, ptr [[KERNEL_ARGS]]) +// OMP-DEfAULT-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// OMP-DEfAULT-NEXT: br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// OMP-DEfAULT: omp_offload.failed: +// OMP-DEfAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SCC1Ev_l148(i32 [[TMP3]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: br label [[OMP_OFFLOAD_CONT]] +// OMP-DEfAULT: omp_offload.cont: +// OMP-DEfAULT-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP25:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP24]], ptr [[TMP25]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SCC1Ev_l148 +// OMP-DEfAULT-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SCC1Ev_l148.omp_outlined, i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SCC1Ev_l148.omp_outlined +// OMP-DEfAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR4:[0-9]+]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// OMP-DEfAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// OMP-DEfAULT: cond.true: +// OMP-DEfAULT-NEXT: br label [[COND_END:%.*]] +// OMP-DEfAULT: cond.false: +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: br label [[COND_END]] +// OMP-DEfAULT: cond.end: +// OMP-DEfAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-DEfAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// OMP-DEfAULT: omp.inner.for.cond: +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-DEfAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// OMP-DEfAULT: omp.inner.for.body: +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 8 +// OMP-DEfAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// OMP-DEfAULT: omp.body.continue: +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// OMP-DEfAULT: omp.inner.for.inc: +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-DEfAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// OMP-DEfAULT: omp.inner.for.end: +// OMP-DEfAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// OMP-DEfAULT: omp.loop.exit: +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SCD2Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 9 +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.5 +// OMP-DEfAULT-SAME: () #[[ATTR0]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: call void @_ZN2SDC1Ev(ptr noundef nonnull align 4 dereferenceable(128) @d1) +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SDD1Ev, ptr @d1, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SDC1Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2SDC2Ev(ptr noundef nonnull align 4 dereferenceable(128) [[THIS1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SDD1Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2SDD2Ev(ptr noundef nonnull align 4 dereferenceable(128) [[THIS1]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SDC2Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 11 +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SDD2Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP6]], align 4 +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP9]], align 4 +// OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[TMP10]], align 4 +// OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// OMP-DEfAULT-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// OMP-DEfAULT-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// OMP-DEfAULT-NEXT: store ptr @.offload_sizes.6, ptr [[TMP13]], align 4 +// OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// OMP-DEfAULT-NEXT: store ptr @.offload_maptypes.7, ptr [[TMP14]], align 4 +// OMP-DEfAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP15]], align 4 +// OMP-DEfAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP16]], align 4 +// OMP-DEfAULT-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP17]], align 8 +// OMP-DEfAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP18]], align 8 +// OMP-DEfAULT-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// OMP-DEfAULT-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP19]], align 4 +// OMP-DEfAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// OMP-DEfAULT-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4 +// OMP-DEfAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[TMP21]], align 4 +// OMP-DEfAULT-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SDD1Ev_l174.region_id, ptr [[KERNEL_ARGS]]) +// OMP-DEfAULT-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// OMP-DEfAULT-NEXT: br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// OMP-DEfAULT: omp_offload.failed: +// OMP-DEfAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SDD1Ev_l174(i32 [[TMP3]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: br label [[OMP_OFFLOAD_CONT]] +// OMP-DEfAULT: omp_offload.cont: +// OMP-DEfAULT-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP25:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP24]], ptr [[TMP25]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SDD1Ev_l174 +// OMP-DEfAULT-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SDD1Ev_l174.omp_outlined, i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SDD1Ev_l174.omp_outlined +// OMP-DEfAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR4]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// OMP-DEfAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// OMP-DEfAULT: cond.true: +// OMP-DEfAULT-NEXT: br label [[COND_END:%.*]] +// OMP-DEfAULT: cond.false: +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: br label [[COND_END]] +// OMP-DEfAULT: cond.end: +// OMP-DEfAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-DEfAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// OMP-DEfAULT: omp.inner.for.cond: +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-DEfAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// OMP-DEfAULT: omp.inner.for.body: +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 12 +// OMP-DEfAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// OMP-DEfAULT: omp.body.continue: +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// OMP-DEfAULT: omp.inner.for.inc: +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-DEfAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// OMP-DEfAULT: omp.inner.for.end: +// OMP-DEfAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// OMP-DEfAULT: omp.loop.exit: +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.8 +// OMP-DEfAULT-SAME: () #[[ATTR0]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: call void @_ZN2SEC1Ev(ptr noundef nonnull align 4 dereferenceable(256) @e1) +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SED1Ev, ptr @e1, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SEC1Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2SEC2Ev(ptr noundef nonnull align 4 dereferenceable(256) [[THIS1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SED1Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2SED2Ev(ptr noundef nonnull align 4 dereferenceable(256) [[THIS1]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SEC2Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP6]], align 4 +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP9]], align 4 +// OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[TMP10]], align 4 +// OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// OMP-DEfAULT-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// OMP-DEfAULT-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// OMP-DEfAULT-NEXT: store ptr @.offload_sizes.9, ptr [[TMP13]], align 4 +// OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// OMP-DEfAULT-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP14]], align 4 +// OMP-DEfAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP15]], align 4 +// OMP-DEfAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP16]], align 4 +// OMP-DEfAULT-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP17]], align 8 +// OMP-DEfAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP18]], align 8 +// OMP-DEfAULT-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// OMP-DEfAULT-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP19]], align 4 +// OMP-DEfAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// OMP-DEfAULT-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4 +// OMP-DEfAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[TMP21]], align 4 +// OMP-DEfAULT-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SEC1Ev_l192.region_id, ptr [[KERNEL_ARGS]]) +// OMP-DEfAULT-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// OMP-DEfAULT-NEXT: br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// OMP-DEfAULT: omp_offload.failed: +// OMP-DEfAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SEC1Ev_l192(i32 [[TMP3]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: br label [[OMP_OFFLOAD_CONT]] +// OMP-DEfAULT: omp_offload.cont: +// OMP-DEfAULT-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP25:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP24]], ptr [[TMP25]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SEC1Ev_l192 +// OMP-DEfAULT-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SEC1Ev_l192.omp_outlined, i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SEC1Ev_l192.omp_outlined +// OMP-DEfAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR4]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// OMP-DEfAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// OMP-DEfAULT: cond.true: +// OMP-DEfAULT-NEXT: br label [[COND_END:%.*]] +// OMP-DEfAULT: cond.false: +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: br label [[COND_END]] +// OMP-DEfAULT: cond.end: +// OMP-DEfAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-DEfAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// OMP-DEfAULT: omp.inner.for.cond: +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-DEfAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// OMP-DEfAULT: omp.inner.for.body: +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 14 +// OMP-DEfAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// OMP-DEfAULT: omp.body.continue: +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// OMP-DEfAULT: omp.inner.for.inc: +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-DEfAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// OMP-DEfAULT: omp.inner.for.end: +// OMP-DEfAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// OMP-DEfAULT: omp.loop.exit: +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SED2Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP6]], align 4 +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP9]], align 4 +// OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[TMP10]], align 4 +// OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// OMP-DEfAULT-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// OMP-DEfAULT-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// OMP-DEfAULT-NEXT: store ptr @.offload_sizes.11, ptr [[TMP13]], align 4 +// OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// OMP-DEfAULT-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP14]], align 4 +// OMP-DEfAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP15]], align 4 +// OMP-DEfAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP16]], align 4 +// OMP-DEfAULT-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP17]], align 8 +// OMP-DEfAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP18]], align 8 +// OMP-DEfAULT-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// OMP-DEfAULT-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP19]], align 4 +// OMP-DEfAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// OMP-DEfAULT-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4 +// OMP-DEfAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[TMP21]], align 4 +// OMP-DEfAULT-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SED1Ev_l199.region_id, ptr [[KERNEL_ARGS]]) +// OMP-DEfAULT-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// OMP-DEfAULT-NEXT: br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// OMP-DEfAULT: omp_offload.failed: +// OMP-DEfAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SED1Ev_l199(i32 [[TMP3]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: br label [[OMP_OFFLOAD_CONT]] +// OMP-DEfAULT: omp_offload.cont: +// OMP-DEfAULT-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP25:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP24]], ptr [[TMP25]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SED1Ev_l199 +// OMP-DEfAULT-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SED1Ev_l199.omp_outlined, i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SED1Ev_l199.omp_outlined +// OMP-DEfAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR4]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// OMP-DEfAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// OMP-DEfAULT: cond.true: +// OMP-DEfAULT-NEXT: br label [[COND_END:%.*]] +// OMP-DEfAULT: cond.false: +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: br label [[COND_END]] +// OMP-DEfAULT: cond.end: +// OMP-DEfAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-DEfAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// OMP-DEfAULT: omp.inner.for.cond: +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-DEfAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// OMP-DEfAULT: omp.inner.for.body: +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 15 +// OMP-DEfAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// OMP-DEfAULT: omp.body.continue: +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// OMP-DEfAULT: omp.inner.for.inc: +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-DEfAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// OMP-DEfAULT: omp.inner.for.end: +// OMP-DEfAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// OMP-DEfAULT: omp.loop.exit: +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.13 +// OMP-DEfAULT-SAME: () #[[ATTR0]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: call void @_ZN2STILi100EEC1Ev(ptr noundef nonnull align 4 dereferenceable(912) @t1) +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi100EED1Ev, ptr @t1, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2STILi100EEC1Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2STILi100EEC2Ev(ptr noundef nonnull align 4 dereferenceable(912) [[THIS1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2STILi100EED1Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2STILi100EED2Ev(ptr noundef nonnull align 4 dereferenceable(912) [[THIS1]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2STILi100EEC2Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP6]], align 4 +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP9]], align 4 +// OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[TMP10]], align 4 +// OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// OMP-DEfAULT-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// OMP-DEfAULT-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// OMP-DEfAULT-NEXT: store ptr @.offload_sizes.14, ptr [[TMP13]], align 4 +// OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// OMP-DEfAULT-NEXT: store ptr @.offload_maptypes.15, ptr [[TMP14]], align 4 +// OMP-DEfAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP15]], align 4 +// OMP-DEfAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP16]], align 4 +// OMP-DEfAULT-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP17]], align 8 +// OMP-DEfAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP18]], align 8 +// OMP-DEfAULT-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// OMP-DEfAULT-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP19]], align 4 +// OMP-DEfAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// OMP-DEfAULT-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4 +// OMP-DEfAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[TMP21]], align 4 +// OMP-DEfAULT-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EEC1Ev_l218.region_id, ptr [[KERNEL_ARGS]]) +// OMP-DEfAULT-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// OMP-DEfAULT-NEXT: br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// OMP-DEfAULT: omp_offload.failed: +// OMP-DEfAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EEC1Ev_l218(i32 [[TMP3]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: br label [[OMP_OFFLOAD_CONT]] +// OMP-DEfAULT: omp_offload.cont: +// OMP-DEfAULT-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP25:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP24]], ptr [[TMP25]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EEC1Ev_l218 +// OMP-DEfAULT-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EEC1Ev_l218.omp_outlined, i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EEC1Ev_l218.omp_outlined +// OMP-DEfAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR4]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// OMP-DEfAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// OMP-DEfAULT: cond.true: +// OMP-DEfAULT-NEXT: br label [[COND_END:%.*]] +// OMP-DEfAULT: cond.false: +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: br label [[COND_END]] +// OMP-DEfAULT: cond.end: +// OMP-DEfAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-DEfAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// OMP-DEfAULT: omp.inner.for.cond: +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-DEfAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// OMP-DEfAULT: omp.inner.for.body: +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 117 +// OMP-DEfAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// OMP-DEfAULT: omp.body.continue: +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// OMP-DEfAULT: omp.inner.for.inc: +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-DEfAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// OMP-DEfAULT: omp.inner.for.end: +// OMP-DEfAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// OMP-DEfAULT: omp.loop.exit: +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2STILi100EED2Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP6]], align 4 +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP9]], align 4 +// OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[TMP10]], align 4 +// OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// OMP-DEfAULT-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// OMP-DEfAULT-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// OMP-DEfAULT-NEXT: store ptr @.offload_sizes.16, ptr [[TMP13]], align 4 +// OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// OMP-DEfAULT-NEXT: store ptr @.offload_maptypes.17, ptr [[TMP14]], align 4 +// OMP-DEfAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP15]], align 4 +// OMP-DEfAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP16]], align 4 +// OMP-DEfAULT-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP17]], align 8 +// OMP-DEfAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP18]], align 8 +// OMP-DEfAULT-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// OMP-DEfAULT-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP19]], align 4 +// OMP-DEfAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// OMP-DEfAULT-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4 +// OMP-DEfAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[TMP21]], align 4 +// OMP-DEfAULT-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EED1Ev_l225.region_id, ptr [[KERNEL_ARGS]]) +// OMP-DEfAULT-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// OMP-DEfAULT-NEXT: br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// OMP-DEfAULT: omp_offload.failed: +// OMP-DEfAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EED1Ev_l225(i32 [[TMP3]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: br label [[OMP_OFFLOAD_CONT]] +// OMP-DEfAULT: omp_offload.cont: +// OMP-DEfAULT-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP25:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP24]], ptr [[TMP25]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EED1Ev_l225 +// OMP-DEfAULT-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EED1Ev_l225.omp_outlined, i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EED1Ev_l225.omp_outlined +// OMP-DEfAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR4]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// OMP-DEfAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// OMP-DEfAULT: cond.true: +// OMP-DEfAULT-NEXT: br label [[COND_END:%.*]] +// OMP-DEfAULT: cond.false: +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: br label [[COND_END]] +// OMP-DEfAULT: cond.end: +// OMP-DEfAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-DEfAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// OMP-DEfAULT: omp.inner.for.cond: +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-DEfAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// OMP-DEfAULT: omp.inner.for.body: +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 118 +// OMP-DEfAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// OMP-DEfAULT: omp.body.continue: +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// OMP-DEfAULT: omp.inner.for.inc: +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-DEfAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// OMP-DEfAULT: omp.inner.for.end: +// OMP-DEfAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// OMP-DEfAULT: omp.loop.exit: +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.18 +// OMP-DEfAULT-SAME: () #[[ATTR0]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: call void @_ZN2STILi1000EEC1Ev(ptr noundef nonnull align 4 dereferenceable(4512) @t2) +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi1000EED1Ev, ptr @t2, ptr @__dso_handle) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2STILi1000EEC1Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2STILi1000EEC2Ev(ptr noundef nonnull align 4 dereferenceable(4512) [[THIS1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2STILi1000EED1Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2STILi1000EED2Ev(ptr noundef nonnull align 4 dereferenceable(4512) [[THIS1]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2STILi1000EEC2Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP6]], align 4 +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP9]], align 4 +// OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[TMP10]], align 4 +// OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// OMP-DEfAULT-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// OMP-DEfAULT-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// OMP-DEfAULT-NEXT: store ptr @.offload_sizes.19, ptr [[TMP13]], align 4 +// OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// OMP-DEfAULT-NEXT: store ptr @.offload_maptypes.20, ptr [[TMP14]], align 4 +// OMP-DEfAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP15]], align 4 +// OMP-DEfAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP16]], align 4 +// OMP-DEfAULT-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP17]], align 8 +// OMP-DEfAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP18]], align 8 +// OMP-DEfAULT-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// OMP-DEfAULT-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP19]], align 4 +// OMP-DEfAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// OMP-DEfAULT-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4 +// OMP-DEfAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[TMP21]], align 4 +// OMP-DEfAULT-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EEC1Ev_l218.region_id, ptr [[KERNEL_ARGS]]) +// OMP-DEfAULT-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// OMP-DEfAULT-NEXT: br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// OMP-DEfAULT: omp_offload.failed: +// OMP-DEfAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EEC1Ev_l218(i32 [[TMP3]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: br label [[OMP_OFFLOAD_CONT]] +// OMP-DEfAULT: omp_offload.cont: +// OMP-DEfAULT-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP25:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP24]], ptr [[TMP25]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EEC1Ev_l218 +// OMP-DEfAULT-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EEC1Ev_l218.omp_outlined, i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EEC1Ev_l218.omp_outlined +// OMP-DEfAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR4]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// OMP-DEfAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// OMP-DEfAULT: cond.true: +// OMP-DEfAULT-NEXT: br label [[COND_END:%.*]] +// OMP-DEfAULT: cond.false: +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: br label [[COND_END]] +// OMP-DEfAULT: cond.end: +// OMP-DEfAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-DEfAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// OMP-DEfAULT: omp.inner.for.cond: +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-DEfAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// OMP-DEfAULT: omp.inner.for.body: +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1017 +// OMP-DEfAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// OMP-DEfAULT: omp.body.continue: +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// OMP-DEfAULT: omp.inner.for.inc: +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-DEfAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// OMP-DEfAULT: omp.inner.for.end: +// OMP-DEfAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// OMP-DEfAULT: omp.loop.exit: +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2STILi1000EED2Ev +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP6]], align 4 +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP9]], align 4 +// OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[TMP10]], align 4 +// OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// OMP-DEfAULT-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// OMP-DEfAULT-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// OMP-DEfAULT-NEXT: store ptr @.offload_sizes.21, ptr [[TMP13]], align 4 +// OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// OMP-DEfAULT-NEXT: store ptr @.offload_maptypes.22, ptr [[TMP14]], align 4 +// OMP-DEfAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP15]], align 4 +// OMP-DEfAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP16]], align 4 +// OMP-DEfAULT-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP17]], align 8 +// OMP-DEfAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP18]], align 8 +// OMP-DEfAULT-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// OMP-DEfAULT-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP19]], align 4 +// OMP-DEfAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// OMP-DEfAULT-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4 +// OMP-DEfAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[TMP21]], align 4 +// OMP-DEfAULT-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EED1Ev_l225.region_id, ptr [[KERNEL_ARGS]]) +// OMP-DEfAULT-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// OMP-DEfAULT-NEXT: br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// OMP-DEfAULT: omp_offload.failed: +// OMP-DEfAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EED1Ev_l225(i32 [[TMP3]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: br label [[OMP_OFFLOAD_CONT]] +// OMP-DEfAULT: omp_offload.cont: +// OMP-DEfAULT-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP25:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP24]], ptr [[TMP25]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EED1Ev_l225 +// OMP-DEfAULT-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EED1Ev_l225.omp_outlined, i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EED1Ev_l225.omp_outlined +// OMP-DEfAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR4]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// OMP-DEfAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// OMP-DEfAULT: cond.true: +// OMP-DEfAULT-NEXT: br label [[COND_END:%.*]] +// OMP-DEfAULT: cond.false: +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: br label [[COND_END]] +// OMP-DEfAULT: cond.end: +// OMP-DEfAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-DEfAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// OMP-DEfAULT: omp.inner.for.cond: +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-DEfAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// OMP-DEfAULT: omp.inner.for.body: +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1018 +// OMP-DEfAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// OMP-DEfAULT: omp.body.continue: +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// OMP-DEfAULT: omp.inner.for.inc: +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-DEfAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// OMP-DEfAULT: omp.inner.for.end: +// OMP-DEfAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// OMP-DEfAULT: omp.loop.exit: +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_Z3bari +// OMP-DEfAULT-SAME: (i32 noundef [[A:%.*]]) #[[ATTR5:[0-9]+]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[R:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[R_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP0]], ptr [[R]], align 4 +// OMP-DEfAULT-NEXT: call void @_ZN2SA3fooEv(ptr noundef nonnull align 4 dereferenceable(16) @_ZL2a1) +// OMP-DEfAULT-NEXT: call void @_ZN2SA3fooEv(ptr noundef nonnull align 4 dereferenceable(16) @a2) +// OMP-DEfAULT-NEXT: call void @_ZN2SB3fooEv(ptr noundef nonnull align 4 dereferenceable(32) @b1) +// OMP-DEfAULT-NEXT: call void @_ZN2SB3fooEv(ptr noundef nonnull align 4 dereferenceable(32) @b2) +// OMP-DEfAULT-NEXT: call void @_ZN2SC3fooEv(ptr noundef nonnull align 4 dereferenceable(64) @_ZL2c1) +// OMP-DEfAULT-NEXT: call void @_ZN2SD3fooEv(ptr noundef nonnull align 4 dereferenceable(128) @d1) +// OMP-DEfAULT-NEXT: call void @_ZN2SE3fooEv(ptr noundef nonnull align 4 dereferenceable(256) @e1) +// OMP-DEfAULT-NEXT: call void @_ZN2STILi100EE3fooEv(ptr noundef nonnull align 4 dereferenceable(912) @t1) +// OMP-DEfAULT-NEXT: call void @_ZN2STILi1000EE3fooEv(ptr noundef nonnull align 4 dereferenceable(4512) @t2) +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[R]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[R_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[R_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP2]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP5]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP8]], align 4 +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[TMP9]], align 4 +// OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// OMP-DEfAULT-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// OMP-DEfAULT-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// OMP-DEfAULT-NEXT: store ptr @.offload_sizes.23, ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// OMP-DEfAULT-NEXT: store ptr @.offload_maptypes.24, ptr [[TMP13]], align 4 +// OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP14]], align 4 +// OMP-DEfAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP15]], align 4 +// OMP-DEfAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP16]], align 8 +// OMP-DEfAULT-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP17]], align 8 +// OMP-DEfAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// OMP-DEfAULT-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP18]], align 4 +// OMP-DEfAULT-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// OMP-DEfAULT-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4 +// OMP-DEfAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[TMP20]], align 4 +// OMP-DEfAULT-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3bari_l267.region_id, ptr [[KERNEL_ARGS]]) +// OMP-DEfAULT-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// OMP-DEfAULT-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// OMP-DEfAULT: omp_offload.failed: +// OMP-DEfAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3bari_l267(i32 [[TMP2]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: br label [[OMP_OFFLOAD_CONT]] +// OMP-DEfAULT: omp_offload.cont: +// OMP-DEfAULT-NEXT: [[TMP23:%.*]] = load i32, ptr [[R]], align 4 +// OMP-DEfAULT-NEXT: [[TMP24:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP25]] +// OMP-DEfAULT-NEXT: ret i32 [[ADD]] +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SA3fooEv +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SB3fooEv +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP6]], align 4 +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP9]], align 4 +// OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[TMP10]], align 4 +// OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// OMP-DEfAULT-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// OMP-DEfAULT-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// OMP-DEfAULT-NEXT: store ptr @.offload_sizes.25, ptr [[TMP13]], align 4 +// OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// OMP-DEfAULT-NEXT: store ptr @.offload_maptypes.26, ptr [[TMP14]], align 4 +// OMP-DEfAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP15]], align 4 +// OMP-DEfAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP16]], align 4 +// OMP-DEfAULT-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP17]], align 8 +// OMP-DEfAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP18]], align 8 +// OMP-DEfAULT-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// OMP-DEfAULT-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP19]], align 4 +// OMP-DEfAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// OMP-DEfAULT-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4 +// OMP-DEfAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[TMP21]], align 4 +// OMP-DEfAULT-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SB3fooEv_l122.region_id, ptr [[KERNEL_ARGS]]) +// OMP-DEfAULT-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// OMP-DEfAULT-NEXT: br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// OMP-DEfAULT: omp_offload.failed: +// OMP-DEfAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SB3fooEv_l122(i32 [[TMP3]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: br label [[OMP_OFFLOAD_CONT]] +// OMP-DEfAULT: omp_offload.cont: +// OMP-DEfAULT-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP25:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP24]], ptr [[TMP25]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SC3fooEv +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 7 +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SD3fooEv +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 10 +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2SE3fooEv +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SE3fooEv_l185(i32 [[TMP3]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2STILi100EE3fooEv +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP6]], align 4 +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP9]], align 4 +// OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[TMP10]], align 4 +// OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// OMP-DEfAULT-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// OMP-DEfAULT-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// OMP-DEfAULT-NEXT: store ptr @.offload_sizes.27, ptr [[TMP13]], align 4 +// OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// OMP-DEfAULT-NEXT: store ptr @.offload_maptypes.28, ptr [[TMP14]], align 4 +// OMP-DEfAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP15]], align 4 +// OMP-DEfAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP16]], align 4 +// OMP-DEfAULT-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP17]], align 8 +// OMP-DEfAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP18]], align 8 +// OMP-DEfAULT-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// OMP-DEfAULT-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP19]], align 4 +// OMP-DEfAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// OMP-DEfAULT-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4 +// OMP-DEfAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[TMP21]], align 4 +// OMP-DEfAULT-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EE3fooEv_l211.region_id, ptr [[KERNEL_ARGS]]) +// OMP-DEfAULT-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// OMP-DEfAULT-NEXT: br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// OMP-DEfAULT: omp_offload.failed: +// OMP-DEfAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EE3fooEv_l211(i32 [[TMP3]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: br label [[OMP_OFFLOAD_CONT]] +// OMP-DEfAULT: omp_offload.cont: +// OMP-DEfAULT-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP25:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP24]], ptr [[TMP25]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_ZN2STILi1000EE3fooEv +// OMP-DEfAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// OMP-DEfAULT-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// OMP-DEfAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 [[TMP3]], ptr [[TMP5]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP6]], align 4 +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP9]], align 4 +// OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[TMP10]], align 4 +// OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// OMP-DEfAULT-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// OMP-DEfAULT-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// OMP-DEfAULT-NEXT: store ptr @.offload_sizes.29, ptr [[TMP13]], align 4 +// OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// OMP-DEfAULT-NEXT: store ptr @.offload_maptypes.30, ptr [[TMP14]], align 4 +// OMP-DEfAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP15]], align 4 +// OMP-DEfAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// OMP-DEfAULT-NEXT: store ptr null, ptr [[TMP16]], align 4 +// OMP-DEfAULT-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP17]], align 8 +// OMP-DEfAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// OMP-DEfAULT-NEXT: store i64 0, ptr [[TMP18]], align 8 +// OMP-DEfAULT-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// OMP-DEfAULT-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP19]], align 4 +// OMP-DEfAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// OMP-DEfAULT-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP20]], align 4 +// OMP-DEfAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[TMP21]], align 4 +// OMP-DEfAULT-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EE3fooEv_l211.region_id, ptr [[KERNEL_ARGS]]) +// OMP-DEfAULT-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// OMP-DEfAULT-NEXT: br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// OMP-DEfAULT: omp_offload.failed: +// OMP-DEfAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EE3fooEv_l211(i32 [[TMP3]]) #[[ATTR2]] +// OMP-DEfAULT-NEXT: br label [[OMP_OFFLOAD_CONT]] +// OMP-DEfAULT: omp_offload.cont: +// OMP-DEfAULT-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// OMP-DEfAULT-NEXT: [[TMP25:%.*]] = load ptr, ptr @R, align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP24]], ptr [[TMP25]], align 4 +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3bari_l267 +// OMP-DEfAULT-SAME: (i32 noundef [[R:%.*]]) #[[ATTR3]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[R_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[R_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store i32 [[R]], ptr [[R_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[R_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP0]], ptr [[R_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[R_CASTED]], align 4 +// OMP-DEfAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3bari_l267.omp_outlined, i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3bari_l267.omp_outlined +// OMP-DEfAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[R:%.*]]) #[[ATTR4]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[R_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[R]], ptr [[R_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// OMP-DEfAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// OMP-DEfAULT: cond.true: +// OMP-DEfAULT-NEXT: br label [[COND_END:%.*]] +// OMP-DEfAULT: cond.false: +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: br label [[COND_END]] +// OMP-DEfAULT: cond.end: +// OMP-DEfAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-DEfAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// OMP-DEfAULT: omp.inner.for.cond: +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-DEfAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// OMP-DEfAULT: omp.inner.for.body: +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[R_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// OMP-DEfAULT-NEXT: store i32 [[INC]], ptr [[R_ADDR]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// OMP-DEfAULT: omp.body.continue: +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// OMP-DEfAULT: omp.inner.for.inc: +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-DEfAULT-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// OMP-DEfAULT: omp.inner.for.end: +// OMP-DEfAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// OMP-DEfAULT: omp.loop.exit: +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SB3fooEv_l122 +// OMP-DEfAULT-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SB3fooEv_l122.omp_outlined, i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SB3fooEv_l122.omp_outlined +// OMP-DEfAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR4]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// OMP-DEfAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// OMP-DEfAULT: cond.true: +// OMP-DEfAULT-NEXT: br label [[COND_END:%.*]] +// OMP-DEfAULT: cond.false: +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: br label [[COND_END]] +// OMP-DEfAULT: cond.end: +// OMP-DEfAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-DEfAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// OMP-DEfAULT: omp.inner.for.cond: +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-DEfAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// OMP-DEfAULT: omp.inner.for.body: +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 4 +// OMP-DEfAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// OMP-DEfAULT: omp.body.continue: +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// OMP-DEfAULT: omp.inner.for.inc: +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-DEfAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// OMP-DEfAULT: omp.inner.for.end: +// OMP-DEfAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// OMP-DEfAULT: omp.loop.exit: +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SE3fooEv_l185 +// OMP-DEfAULT-SAME: (i32 noundef [[A:%.*]]) #[[ATTR4]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SE3fooEv_l185.omp_outlined, i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SE3fooEv_l185.omp_outlined +// OMP-DEfAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR4]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// OMP-DEfAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// OMP-DEfAULT: cond.true: +// OMP-DEfAULT-NEXT: br label [[COND_END:%.*]] +// OMP-DEfAULT: cond.false: +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: br label [[COND_END]] +// OMP-DEfAULT: cond.end: +// OMP-DEfAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-DEfAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// OMP-DEfAULT: omp.inner.for.cond: +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-DEfAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// OMP-DEfAULT: omp.inner.for.body: +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 13 +// OMP-DEfAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// OMP-DEfAULT: omp.body.continue: +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// OMP-DEfAULT: omp.inner.for.inc: +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-DEfAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// OMP-DEfAULT: omp.inner.for.end: +// OMP-DEfAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// OMP-DEfAULT: omp.loop.exit: +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EE3fooEv_l211 +// OMP-DEfAULT-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EE3fooEv_l211.omp_outlined, i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EE3fooEv_l211.omp_outlined +// OMP-DEfAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR4]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// OMP-DEfAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// OMP-DEfAULT: cond.true: +// OMP-DEfAULT-NEXT: br label [[COND_END:%.*]] +// OMP-DEfAULT: cond.false: +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: br label [[COND_END]] +// OMP-DEfAULT: cond.end: +// OMP-DEfAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-DEfAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// OMP-DEfAULT: omp.inner.for.cond: +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-DEfAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// OMP-DEfAULT: omp.inner.for.body: +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 116 +// OMP-DEfAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// OMP-DEfAULT: omp.body.continue: +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// OMP-DEfAULT: omp.inner.for.inc: +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-DEfAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// OMP-DEfAULT: omp.inner.for.end: +// OMP-DEfAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// OMP-DEfAULT: omp.loop.exit: +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EE3fooEv_l211 +// OMP-DEfAULT-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// OMP-DEfAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EE3fooEv_l211.omp_outlined, i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EE3fooEv_l211.omp_outlined +// OMP-DEfAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR4]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// OMP-DEfAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// OMP-DEfAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// OMP-DEfAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-DEfAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// OMP-DEfAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// OMP-DEfAULT: cond.true: +// OMP-DEfAULT-NEXT: br label [[COND_END:%.*]] +// OMP-DEfAULT: cond.false: +// OMP-DEfAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: br label [[COND_END]] +// OMP-DEfAULT: cond.end: +// OMP-DEfAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-DEfAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-DEfAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// OMP-DEfAULT: omp.inner.for.cond: +// OMP-DEfAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-DEfAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-DEfAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// OMP-DEfAULT: omp.inner.for.body: +// OMP-DEfAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-DEfAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// OMP-DEfAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// OMP-DEfAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1016 +// OMP-DEfAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// OMP-DEfAULT: omp.body.continue: +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// OMP-DEfAULT: omp.inner.for.inc: +// OMP-DEfAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-DEfAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// OMP-DEfAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// OMP-DEfAULT: omp.inner.for.end: +// OMP-DEfAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// OMP-DEfAULT: omp.loop.exit: +// OMP-DEfAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_GLOBAL__I_000500 +// OMP-DEfAULT-SAME: () #[[ATTR0]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: call void @__cxx_global_var_init() +// OMP-DEfAULT-NEXT: call void @__cxx_global_var_init.2() +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_GLOBAL__I_000501 +// OMP-DEfAULT-SAME: () #[[ATTR0]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: call void @__cxx_global_var_init.3() +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_parallel_generic_loop_codegen_1.cpp +// OMP-DEfAULT-SAME: () #[[ATTR0]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: call void @__cxx_global_var_init.1() +// OMP-DEfAULT-NEXT: call void @__cxx_global_var_init.4() +// OMP-DEfAULT-NEXT: call void @__cxx_global_var_init.5() +// OMP-DEfAULT-NEXT: call void @__cxx_global_var_init.8() +// OMP-DEfAULT-NEXT: call void @__cxx_global_var_init.13() +// OMP-DEfAULT-NEXT: call void @__cxx_global_var_init.18() +// OMP-DEfAULT-NEXT: ret void +// +// +// OMP-DEfAULT-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// OMP-DEfAULT-SAME: () #[[ATTR0]] { +// OMP-DEfAULT-NEXT: entry: +// OMP-DEfAULT-NEXT: call void @__tgt_register_requires(i64 1) +// OMP-DEfAULT-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init +// CHECK-NTARGET-OMP-DEFAULT-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @_ZL2a1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @_ZL2a1, ptr @__dso_handle) #[[ATTR2:[0-9]+]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SAC1Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SAC2Ev(ptr noundef nonnull align 4 dereferenceable(16) [[THIS1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SAD1Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SAD2Ev(ptr noundef nonnull align 4 dereferenceable(16) [[THIS1]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SAC2Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 2 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SAD2Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 3 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 +// CHECK-NTARGET-OMP-DEFAULT-SAME: () #[[ATTR0]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SAC1Ev(ptr noundef nonnull align 4 dereferenceable(16) @a2) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SAD1Ev, ptr @a2, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 +// CHECK-NTARGET-OMP-DEFAULT-SAME: () #[[ATTR0]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b1, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SBC1Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SBC2Ev(ptr noundef nonnull align 4 dereferenceable(32) [[THIS1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SBD1Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SBD2Ev(ptr noundef nonnull align 4 dereferenceable(32) [[THIS1]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SBC2Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 5 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SBD2Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 6 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 +// CHECK-NTARGET-OMP-DEFAULT-SAME: () #[[ATTR0]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SBC1Ev(ptr noundef nonnull align 4 dereferenceable(32) @b2) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SBD1Ev, ptr @b2, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.4 +// CHECK-NTARGET-OMP-DEFAULT-SAME: () #[[ATTR0]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SCC1Ev(ptr noundef nonnull align 4 dereferenceable(64) @_ZL2c1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SCD1Ev, ptr @_ZL2c1, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SCC1Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SCC2Ev(ptr noundef nonnull align 4 dereferenceable(64) [[THIS1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SCD1Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SCD2Ev(ptr noundef nonnull align 4 dereferenceable(64) [[THIS1]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SCC2Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SCC1Ev_l148(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SCC1Ev_l148 +// CHECK-NTARGET-OMP-DEFAULT-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SCC1Ev_l148.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SCC1Ev_l148.omp_outlined +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.true: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.false: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END]] +// CHECK-NTARGET-OMP-DEFAULT: cond.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.cond: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.body: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.body.continue: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.inc: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.loop.exit: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SCD2Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 9 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.5 +// CHECK-NTARGET-OMP-DEFAULT-SAME: () #[[ATTR0]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SDC1Ev(ptr noundef nonnull align 4 dereferenceable(128) @d1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SDD1Ev, ptr @d1, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SDC1Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SDC2Ev(ptr noundef nonnull align 4 dereferenceable(128) [[THIS1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SDD1Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SDD2Ev(ptr noundef nonnull align 4 dereferenceable(128) [[THIS1]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SDC2Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 11 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SDD2Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SDD1Ev_l174(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SDD1Ev_l174 +// CHECK-NTARGET-OMP-DEFAULT-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SDD1Ev_l174.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SDD1Ev_l174.omp_outlined +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.true: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.false: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END]] +// CHECK-NTARGET-OMP-DEFAULT: cond.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.cond: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.body: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 12 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.body.continue: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.inc: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.loop.exit: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.6 +// CHECK-NTARGET-OMP-DEFAULT-SAME: () #[[ATTR0]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SEC1Ev(ptr noundef nonnull align 4 dereferenceable(256) @e1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2SED1Ev, ptr @e1, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SEC1Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SEC2Ev(ptr noundef nonnull align 4 dereferenceable(256) [[THIS1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SED1Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SED2Ev(ptr noundef nonnull align 4 dereferenceable(256) [[THIS1]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SEC2Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SEC1Ev_l192(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SEC1Ev_l192 +// CHECK-NTARGET-OMP-DEFAULT-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SEC1Ev_l192.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SEC1Ev_l192.omp_outlined +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.true: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.false: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END]] +// CHECK-NTARGET-OMP-DEFAULT: cond.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.cond: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.body: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 14 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.body.continue: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.inc: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.loop.exit: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SED2Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SED1Ev_l199(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SED1Ev_l199 +// CHECK-NTARGET-OMP-DEFAULT-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SED1Ev_l199.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SED1Ev_l199.omp_outlined +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.true: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.false: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END]] +// CHECK-NTARGET-OMP-DEFAULT: cond.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.cond: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.body: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 15 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.body.continue: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.inc: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.loop.exit: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.7 +// CHECK-NTARGET-OMP-DEFAULT-SAME: () #[[ATTR0]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2STILi100EEC1Ev(ptr noundef nonnull align 4 dereferenceable(912) @t1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi100EED1Ev, ptr @t1, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2STILi100EEC1Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2STILi100EEC2Ev(ptr noundef nonnull align 4 dereferenceable(912) [[THIS1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2STILi100EED1Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2STILi100EED2Ev(ptr noundef nonnull align 4 dereferenceable(912) [[THIS1]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2STILi100EEC2Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EEC1Ev_l218(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EEC1Ev_l218 +// CHECK-NTARGET-OMP-DEFAULT-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EEC1Ev_l218.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EEC1Ev_l218.omp_outlined +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.true: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.false: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END]] +// CHECK-NTARGET-OMP-DEFAULT: cond.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.cond: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.body: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 117 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.body.continue: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.inc: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.loop.exit: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2STILi100EED2Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EED1Ev_l225(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EED1Ev_l225 +// CHECK-NTARGET-OMP-DEFAULT-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EED1Ev_l225.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EED1Ev_l225.omp_outlined +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.true: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.false: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END]] +// CHECK-NTARGET-OMP-DEFAULT: cond.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.cond: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.body: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 118 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.body.continue: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.inc: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.loop.exit: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@__cxx_global_var_init.8 +// CHECK-NTARGET-OMP-DEFAULT-SAME: () #[[ATTR0]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2STILi1000EEC1Ev(ptr noundef nonnull align 4 dereferenceable(4512) @t2) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2STILi1000EED1Ev, ptr @t2, ptr @__dso_handle) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2STILi1000EEC1Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2STILi1000EEC2Ev(ptr noundef nonnull align 4 dereferenceable(4512) [[THIS1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2STILi1000EED1Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2STILi1000EED2Ev(ptr noundef nonnull align 4 dereferenceable(4512) [[THIS1]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2STILi1000EEC2Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EEC1Ev_l218(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EEC1Ev_l218 +// CHECK-NTARGET-OMP-DEFAULT-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EEC1Ev_l218.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EEC1Ev_l218.omp_outlined +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.true: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.false: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END]] +// CHECK-NTARGET-OMP-DEFAULT: cond.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.cond: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.body: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1017 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.body.continue: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.inc: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.loop.exit: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2STILi1000EED2Ev +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EED1Ev_l225(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EED1Ev_l225 +// CHECK-NTARGET-OMP-DEFAULT-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EED1Ev_l225.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EED1Ev_l225.omp_outlined +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.true: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.false: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END]] +// CHECK-NTARGET-OMP-DEFAULT: cond.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.cond: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.body: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1018 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.body.continue: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.inc: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.loop.exit: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_Z3bari +// CHECK-NTARGET-OMP-DEFAULT-SAME: (i32 noundef signext [[A:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[R:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[R_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP0]], ptr [[R]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SA3fooEv(ptr noundef nonnull align 4 dereferenceable(16) @_ZL2a1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SA3fooEv(ptr noundef nonnull align 4 dereferenceable(16) @a2) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SB3fooEv(ptr noundef nonnull align 4 dereferenceable(32) @b1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SB3fooEv(ptr noundef nonnull align 4 dereferenceable(32) @b2) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SC3fooEv(ptr noundef nonnull align 4 dereferenceable(64) @_ZL2c1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SD3fooEv(ptr noundef nonnull align 4 dereferenceable(128) @d1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2SE3fooEv(ptr noundef nonnull align 4 dereferenceable(256) @e1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2STILi100EE3fooEv(ptr noundef nonnull align 4 dereferenceable(912) @t1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @_ZN2STILi1000EE3fooEv(ptr noundef nonnull align 4 dereferenceable(4512) @t2) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[R]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[R_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i64, ptr [[R_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3bari_l267(i64 [[TMP2]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[R]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP5]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret i32 [[ADD]] +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SA3fooEv +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SB3fooEv +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(32) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SB3fooEv_l122(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SC3fooEv +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(64) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 7 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SD3fooEv +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(128) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 10 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2SE3fooEv +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SE3fooEv_l185(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2STILi100EE3fooEv +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(912) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EE3fooEv_l211(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_ZN2STILi1000EE3fooEv +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noundef nonnull align 4 dereferenceable(4512) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP1]], ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EE3fooEv_l211(i64 [[TMP3]]) #[[ATTR2]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load ptr, ptr @R, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3bari_l267 +// CHECK-NTARGET-OMP-DEFAULT-SAME: (i64 noundef [[R:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[R_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[R_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[R]], ptr [[R_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[R_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP0]], ptr [[R_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[R_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3bari_l267.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3bari_l267.omp_outlined +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[R:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[R_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[R]], ptr [[R_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.true: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.false: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END]] +// CHECK-NTARGET-OMP-DEFAULT: cond.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.cond: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.body: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[R_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[INC]], ptr [[R_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.body.continue: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.inc: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.loop.exit: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SB3fooEv_l122 +// CHECK-NTARGET-OMP-DEFAULT-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SB3fooEv_l122.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SB3fooEv_l122.omp_outlined +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.true: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.false: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END]] +// CHECK-NTARGET-OMP-DEFAULT: cond.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.cond: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.body: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.body.continue: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.inc: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.loop.exit: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SE3fooEv_l185 +// CHECK-NTARGET-OMP-DEFAULT-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SE3fooEv_l185.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SE3fooEv_l185.omp_outlined +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.true: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.false: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END]] +// CHECK-NTARGET-OMP-DEFAULT: cond.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.cond: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.body: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 13 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.body.continue: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.inc: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.loop.exit: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EE3fooEv_l211 +// CHECK-NTARGET-OMP-DEFAULT-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EE3fooEv_l211.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi100EE3fooEv_l211.omp_outlined +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.true: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.false: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END]] +// CHECK-NTARGET-OMP-DEFAULT: cond.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.cond: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.body: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 116 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.body.continue: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.inc: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.loop.exit: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EE3fooEv_l211 +// CHECK-NTARGET-OMP-DEFAULT-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EE3fooEv_l211.omp_outlined, i64 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STILi1000EE3fooEv_l211.omp_outlined +// CHECK-NTARGET-OMP-DEFAULT-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.true: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: cond.false: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[COND_END]] +// CHECK-NTARGET-OMP-DEFAULT: cond.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.cond: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.body: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1016 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.body.continue: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.inc: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-NTARGET-OMP-DEFAULT: omp.inner.for.end: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-NTARGET-OMP-DEFAULT: omp.loop.exit: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_GLOBAL__I_000500 +// CHECK-NTARGET-OMP-DEFAULT-SAME: () #[[ATTR0]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__cxx_global_var_init() +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.2() +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_GLOBAL__I_000501 +// CHECK-NTARGET-OMP-DEFAULT-SAME: () #[[ATTR0]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.3() +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +// +// CHECK-NTARGET-OMP-DEFAULT-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_parallel_generic_loop_codegen_1.cpp +// CHECK-NTARGET-OMP-DEFAULT-SAME: () #[[ATTR0]] { +// CHECK-NTARGET-OMP-DEFAULT-NEXT: entry: +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.1() +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.4() +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.5() +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.6() +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.7() +// CHECK-NTARGET-OMP-DEFAULT-NEXT: call void @__cxx_global_var_init.8() +// CHECK-NTARGET-OMP-DEFAULT-NEXT: ret void +// +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// TCHECK: {{.*}} diff --git a/clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp b/clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp @@ -0,0 +1,952 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 2 +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-X86 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK-X86 + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0-X86 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0-X86 %s + +// Test target parallel for codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s -check-prefix=TCHECK-TARGET +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK-TARGET +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -check-prefix=TCHECK-TARGET-X86 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=TCHECK-TARGET-X86 + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1-TARGET %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1-TARGET %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1-TARGET-X86 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1-TARGET-X86 %s + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + + +int nested(int a){ + #pragma omp target parallel loop + for (int i = 0; i < 10; ++i) + ++a; + + auto F = [&](){ + #pragma omp parallel + { + #pragma omp target parallel loop + for (int i = 0; i < 10; ++i) + ++a; + } + }; + + F(); + + return a; +} + + + + + + +// Check metadata is properly generated: + +#endif +// CHECK-LABEL: define dso_local noundef signext i32 @_Z6nestedi +// CHECK-SAME: (i32 noundef signext [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-NEXT: store i32 2, ptr [[TMP7]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP17]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-NEXT: store i32 0, ptr [[TMP19]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.region_id, ptr [[KERNEL_ARGS]]) +// CHECK-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK: omp_offload.failed: +// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42(i64 [[TMP1]]) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK: omp_offload.cont: +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[A_ADDR]], ptr [[TMP22]], align 8 +// CHECK-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[F]]) +// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: ret i32 [[TMP23]] +// +// +// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42 +// CHECK-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i64 [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK: omp.body.continue: +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK: omp.inner.for.end: +// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp.loop.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49 +// CHECK-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i64 [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK: omp.body.continue: +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK: omp.inner.for.end: +// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp.loop.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void @.omp_offloading.requires_reg +// CHECK-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-NEXT: ret void +// +// +// CHECK-X86-LABEL: define dso_local noundef i32 @_Z6nestedi +// CHECK-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-X86-NEXT: entry: +// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK-X86-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK-X86-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CHECK-X86-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-X86-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 4 +// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK-X86-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-X86-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 +// CHECK-X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-X86-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK-X86-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK-X86-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK-X86-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-X86-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-X86-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-X86-NEXT: store i32 2, ptr [[TMP7]], align 4 +// CHECK-X86-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-X86-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK-X86-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-X86-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK-X86-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-X86-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK-X86-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-X86-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 +// CHECK-X86-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-X86-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 +// CHECK-X86-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-X86-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK-X86-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-X86-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK-X86-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-X86-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CHECK-X86-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-X86-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK-X86-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-X86-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP17]], align 4 +// CHECK-X86-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-X86-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 +// CHECK-X86-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-X86-NEXT: store i32 0, ptr [[TMP19]], align 4 +// CHECK-X86-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.region_id, ptr [[KERNEL_ARGS]]) +// CHECK-X86-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK-X86-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-X86: omp_offload.failed: +// CHECK-X86-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42(i32 [[TMP1]]) #[[ATTR3:[0-9]+]] +// CHECK-X86-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-X86: omp_offload.cont: +// CHECK-X86-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0 +// CHECK-X86-NEXT: store ptr [[A_ADDR]], ptr [[TMP22]], align 4 +// CHECK-X86-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 4 dereferenceable(4) [[F]]) +// CHECK-X86-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-X86-NEXT: ret i32 [[TMP23]] +// +// +// CHECK-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42 +// CHECK-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-X86-NEXT: entry: +// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i32 [[TMP1]]) +// CHECK-X86-NEXT: ret void +// +// +// CHECK-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined +// CHECK-X86-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-X86-NEXT: entry: +// CHECK-X86-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-X86-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-X86-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-X86-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-X86-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-X86-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-X86-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-X86-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-X86-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-X86: cond.true: +// CHECK-X86-NEXT: br label [[COND_END:%.*]] +// CHECK-X86: cond.false: +// CHECK-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-X86-NEXT: br label [[COND_END]] +// CHECK-X86: cond.end: +// CHECK-X86-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-X86-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-X86-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-X86: omp.inner.for.cond: +// CHECK-X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-X86-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-X86-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-X86: omp.inner.for.body: +// CHECK-X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-X86-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-X86-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-X86-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-X86-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 +// CHECK-X86-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-X86: omp.body.continue: +// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-X86: omp.inner.for.inc: +// CHECK-X86-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-X86-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-X86-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-X86: omp.inner.for.end: +// CHECK-X86-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-X86: omp.loop.exit: +// CHECK-X86-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-X86-NEXT: ret void +// +// +// CHECK-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49 +// CHECK-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-X86-NEXT: entry: +// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i32 [[TMP1]]) +// CHECK-X86-NEXT: ret void +// +// +// CHECK-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined +// CHECK-X86-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK-X86-NEXT: entry: +// CHECK-X86-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-X86-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-X86-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-X86-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK-X86-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-X86-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-X86-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-X86-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-X86-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-X86-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-X86: cond.true: +// CHECK-X86-NEXT: br label [[COND_END:%.*]] +// CHECK-X86: cond.false: +// CHECK-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-X86-NEXT: br label [[COND_END]] +// CHECK-X86: cond.end: +// CHECK-X86-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-X86-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-X86-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-X86: omp.inner.for.cond: +// CHECK-X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-X86-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-X86-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-X86: omp.inner.for.body: +// CHECK-X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-X86-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-X86-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-X86-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-X86-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 +// CHECK-X86-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-X86: omp.body.continue: +// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-X86: omp.inner.for.inc: +// CHECK-X86-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-X86-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-X86-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// CHECK-X86-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-X86: omp.inner.for.end: +// CHECK-X86-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-X86: omp.loop.exit: +// CHECK-X86-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-X86-NEXT: ret void +// +// +// CHECK-X86-LABEL: define internal void @.omp_offloading.requires_reg +// CHECK-X86-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK-X86-NEXT: entry: +// CHECK-X86-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-X86-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define dso_local noundef signext i32 @_Z6nestedi +// SIMD-ONLY0-SAME: (i32 noundef signext [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY0-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY0: for.cond: +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10 +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY0: for.body: +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY0-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY0: for.inc: +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY0-NEXT: [[INC1:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY0-NEXT: store i32 [[INC1]], ptr [[I]], align 4 +// SIMD-ONLY0-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// SIMD-ONLY0: for.end: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8 +// SIMD-ONLY0-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[F]]) +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP4]] +// +// +// SIMD-ONLY0-X86-LABEL: define dso_local noundef i32 @_Z6nestedi +// SIMD-ONLY0-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY0-X86-NEXT: entry: +// SIMD-ONLY0-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-X86-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-X86-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 4 +// SIMD-ONLY0-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// SIMD-ONLY0-X86-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY0-X86-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY0-X86: for.cond: +// SIMD-ONLY0-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY0-X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10 +// SIMD-ONLY0-X86-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY0-X86: for.body: +// SIMD-ONLY0-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// SIMD-ONLY0-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY0-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 +// SIMD-ONLY0-X86-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY0-X86: for.inc: +// SIMD-ONLY0-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY0-X86-NEXT: [[INC1:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY0-X86-NEXT: store i32 [[INC1]], ptr [[I]], align 4 +// SIMD-ONLY0-X86-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// SIMD-ONLY0-X86: for.end: +// SIMD-ONLY0-X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0 +// SIMD-ONLY0-X86-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 4 +// SIMD-ONLY0-X86-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 4 dereferenceable(4) [[F]]) +// SIMD-ONLY0-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// SIMD-ONLY0-X86-NEXT: ret i32 [[TMP4]] +// +// +// TCHECK-TARGET-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42 +// TCHECK-TARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK-TARGET-NEXT: entry: +// TCHECK-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-TARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// TCHECK-TARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// TCHECK-TARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// TCHECK-TARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i64 [[TMP1]]) +// TCHECK-TARGET-NEXT: ret void +// +// +// TCHECK-TARGET-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined +// TCHECK-TARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// TCHECK-TARGET-NEXT: entry: +// TCHECK-TARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// TCHECK-TARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// TCHECK-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-TARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// TCHECK-TARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// TCHECK-TARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// TCHECK-TARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// TCHECK-TARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// TCHECK-TARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// TCHECK-TARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// TCHECK-TARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// TCHECK-TARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// TCHECK-TARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// TCHECK-TARGET: cond.true: +// TCHECK-TARGET-NEXT: br label [[COND_END:%.*]] +// TCHECK-TARGET: cond.false: +// TCHECK-TARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-NEXT: br label [[COND_END]] +// TCHECK-TARGET: cond.end: +// TCHECK-TARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// TCHECK-TARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// TCHECK-TARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// TCHECK-TARGET: omp.inner.for.cond: +// TCHECK-TARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// TCHECK-TARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// TCHECK-TARGET: omp.inner.for.body: +// TCHECK-TARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// TCHECK-TARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// TCHECK-TARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// TCHECK-TARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// TCHECK-TARGET-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// TCHECK-TARGET: omp.body.continue: +// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// TCHECK-TARGET: omp.inner.for.inc: +// TCHECK-TARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// TCHECK-TARGET-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// TCHECK-TARGET: omp.inner.for.end: +// TCHECK-TARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// TCHECK-TARGET: omp.loop.exit: +// TCHECK-TARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// TCHECK-TARGET-NEXT: ret void +// +// +// TCHECK-TARGET-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49 +// TCHECK-TARGET-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0]] { +// TCHECK-TARGET-NEXT: entry: +// TCHECK-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-TARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// TCHECK-TARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// TCHECK-TARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// TCHECK-TARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i64 [[TMP1]]) +// TCHECK-TARGET-NEXT: ret void +// +// +// TCHECK-TARGET-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined +// TCHECK-TARGET-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// TCHECK-TARGET-NEXT: entry: +// TCHECK-TARGET-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// TCHECK-TARGET-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// TCHECK-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-TARGET-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// TCHECK-TARGET-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// TCHECK-TARGET-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// TCHECK-TARGET-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// TCHECK-TARGET-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// TCHECK-TARGET-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// TCHECK-TARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// TCHECK-TARGET-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// TCHECK-TARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// TCHECK-TARGET-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// TCHECK-TARGET: cond.true: +// TCHECK-TARGET-NEXT: br label [[COND_END:%.*]] +// TCHECK-TARGET: cond.false: +// TCHECK-TARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-NEXT: br label [[COND_END]] +// TCHECK-TARGET: cond.end: +// TCHECK-TARGET-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// TCHECK-TARGET-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// TCHECK-TARGET-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// TCHECK-TARGET: omp.inner.for.cond: +// TCHECK-TARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// TCHECK-TARGET-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// TCHECK-TARGET: omp.inner.for.body: +// TCHECK-TARGET-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// TCHECK-TARGET-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// TCHECK-TARGET-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// TCHECK-TARGET-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// TCHECK-TARGET-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// TCHECK-TARGET: omp.body.continue: +// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// TCHECK-TARGET: omp.inner.for.inc: +// TCHECK-TARGET-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// TCHECK-TARGET-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-NEXT: br label [[OMP_INNER_FOR_COND]] +// TCHECK-TARGET: omp.inner.for.end: +// TCHECK-TARGET-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// TCHECK-TARGET: omp.loop.exit: +// TCHECK-TARGET-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// TCHECK-TARGET-NEXT: ret void +// +// +// TCHECK-TARGET-X86-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42 +// TCHECK-TARGET-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK-TARGET-X86-NEXT: entry: +// TCHECK-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// TCHECK-TARGET-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i32 [[TMP1]]) +// TCHECK-TARGET-X86-NEXT: ret void +// +// +// TCHECK-TARGET-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined +// TCHECK-TARGET-X86-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// TCHECK-TARGET-X86-NEXT: entry: +// TCHECK-TARGET-X86-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// TCHECK-TARGET-X86-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// TCHECK-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[I:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// TCHECK-TARGET-X86-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// TCHECK-TARGET-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// TCHECK-TARGET-X86-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// TCHECK-TARGET-X86: cond.true: +// TCHECK-TARGET-X86-NEXT: br label [[COND_END:%.*]] +// TCHECK-TARGET-X86: cond.false: +// TCHECK-TARGET-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-X86-NEXT: br label [[COND_END]] +// TCHECK-TARGET-X86: cond.end: +// TCHECK-TARGET-X86-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// TCHECK-TARGET-X86-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// TCHECK-TARGET-X86: omp.inner.for.cond: +// TCHECK-TARGET-X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-X86-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// TCHECK-TARGET-X86-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// TCHECK-TARGET-X86: omp.inner.for.body: +// TCHECK-TARGET-X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-X86-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// TCHECK-TARGET-X86-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// TCHECK-TARGET-X86-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// TCHECK-TARGET-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// TCHECK-TARGET-X86: omp.body.continue: +// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// TCHECK-TARGET-X86: omp.inner.for.inc: +// TCHECK-TARGET-X86-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-X86-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// TCHECK-TARGET-X86-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_COND]] +// TCHECK-TARGET-X86: omp.inner.for.end: +// TCHECK-TARGET-X86-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// TCHECK-TARGET-X86: omp.loop.exit: +// TCHECK-TARGET-X86-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// TCHECK-TARGET-X86-NEXT: ret void +// +// +// TCHECK-TARGET-X86-LABEL: define weak_odr protected void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49 +// TCHECK-TARGET-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { +// TCHECK-TARGET-X86-NEXT: entry: +// TCHECK-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// TCHECK-TARGET-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i32 [[TMP1]]) +// TCHECK-TARGET-X86-NEXT: ret void +// +// +// TCHECK-TARGET-X86-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined +// TCHECK-TARGET-X86-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// TCHECK-TARGET-X86-NEXT: entry: +// TCHECK-TARGET-X86-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// TCHECK-TARGET-X86-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// TCHECK-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: [[I:%.*]] = alloca i32, align 4 +// TCHECK-TARGET-X86-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// TCHECK-TARGET-X86-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// TCHECK-TARGET-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-X86-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// TCHECK-TARGET-X86-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// TCHECK-TARGET-X86: cond.true: +// TCHECK-TARGET-X86-NEXT: br label [[COND_END:%.*]] +// TCHECK-TARGET-X86: cond.false: +// TCHECK-TARGET-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-X86-NEXT: br label [[COND_END]] +// TCHECK-TARGET-X86: cond.end: +// TCHECK-TARGET-X86-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// TCHECK-TARGET-X86-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// TCHECK-TARGET-X86: omp.inner.for.cond: +// TCHECK-TARGET-X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// TCHECK-TARGET-X86-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// TCHECK-TARGET-X86-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// TCHECK-TARGET-X86: omp.inner.for.body: +// TCHECK-TARGET-X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-X86-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// TCHECK-TARGET-X86-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// TCHECK-TARGET-X86-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// TCHECK-TARGET-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// TCHECK-TARGET-X86: omp.body.continue: +// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// TCHECK-TARGET-X86: omp.inner.for.inc: +// TCHECK-TARGET-X86-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-X86-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// TCHECK-TARGET-X86-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// TCHECK-TARGET-X86-NEXT: br label [[OMP_INNER_FOR_COND]] +// TCHECK-TARGET-X86: omp.inner.for.end: +// TCHECK-TARGET-X86-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// TCHECK-TARGET-X86: omp.loop.exit: +// TCHECK-TARGET-X86-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// TCHECK-TARGET-X86-NEXT: ret void +// +// +// SIMD-ONLY1-TARGET-LABEL: define dso_local noundef signext i32 @_Z6nestedi +// SIMD-ONLY1-TARGET-SAME: (i32 noundef signext [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY1-TARGET-NEXT: entry: +// SIMD-ONLY1-TARGET-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-TARGET-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-TARGET-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// SIMD-ONLY1-TARGET-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// SIMD-ONLY1-TARGET-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY1-TARGET-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY1-TARGET: for.cond: +// SIMD-ONLY1-TARGET-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY1-TARGET-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10 +// SIMD-ONLY1-TARGET-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY1-TARGET: for.body: +// SIMD-ONLY1-TARGET-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// SIMD-ONLY1-TARGET-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY1-TARGET-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 +// SIMD-ONLY1-TARGET-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY1-TARGET: for.inc: +// SIMD-ONLY1-TARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY1-TARGET-NEXT: [[INC1:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY1-TARGET-NEXT: store i32 [[INC1]], ptr [[I]], align 4 +// SIMD-ONLY1-TARGET-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// SIMD-ONLY1-TARGET: for.end: +// SIMD-ONLY1-TARGET-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0 +// SIMD-ONLY1-TARGET-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8 +// SIMD-ONLY1-TARGET-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[F]]) +// SIMD-ONLY1-TARGET-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// SIMD-ONLY1-TARGET-NEXT: ret i32 [[TMP4]] +// +// +// SIMD-ONLY1-TARGET-X86-LABEL: define dso_local noundef i32 @_Z6nestedi +// SIMD-ONLY1-TARGET-X86-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY1-TARGET-X86-NEXT: entry: +// SIMD-ONLY1-TARGET-X86-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-TARGET-X86-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-TARGET-X86-NEXT: [[F:%.*]] = alloca [[CLASS_ANON:%.*]], align 4 +// SIMD-ONLY1-TARGET-X86-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// SIMD-ONLY1-TARGET-X86-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY1-TARGET-X86-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY1-TARGET-X86: for.cond: +// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY1-TARGET-X86-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10 +// SIMD-ONLY1-TARGET-X86-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY1-TARGET-X86: for.body: +// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// SIMD-ONLY1-TARGET-X86-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY1-TARGET-X86-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 +// SIMD-ONLY1-TARGET-X86-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY1-TARGET-X86: for.inc: +// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY1-TARGET-X86-NEXT: [[INC1:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY1-TARGET-X86-NEXT: store i32 [[INC1]], ptr [[I]], align 4 +// SIMD-ONLY1-TARGET-X86-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// SIMD-ONLY1-TARGET-X86: for.end: +// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[F]], i32 0, i32 0 +// SIMD-ONLY1-TARGET-X86-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 4 +// SIMD-ONLY1-TARGET-X86-NEXT: call void @"_ZZ6nestediENK3$_0clEv"(ptr noundef nonnull align 4 dereferenceable(4) [[F]]) +// SIMD-ONLY1-TARGET-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// SIMD-ONLY1-TARGET-X86-NEXT: ret i32 [[TMP4]] +// diff --git a/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp b/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp @@ -0,0 +1,921 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s --check-prefix=CHECK1 +// expected-no-diagnostics + +int main() { + /* int(*b)[a]; */ + /* int *(**c)[a]; */ + bool bb; + int a; + int b[10][10]; + int c[10][10][10]; +#pragma omp target parallel loop firstprivate(a, b) map(tofrom \ + : c) map(tofrom \ + : bb) if (a) + for (int i = 0; i < 10; ++i) { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + bb |= b[0][a]; + } +#pragma omp target parallel loop firstprivate(a) map(tofrom \ + : c, b) map(to \ + : bb) + for (int i = 0; i < 10; ++i) { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + d = bb; + } +#pragma omp target parallel loop map(tofrom \ + : a, c, b) map(from \ + : bb) + for (int i = 0; i < 10; ++i) { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + bb = b[0][a]; + } + return 0; +} +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__ +// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]], i1 noundef zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG13:![0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META31:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32:![0-9]+]] +// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META33:![0-9]+]], metadata !DIExpression()), !dbg [[DBG34:![0-9]+]] +// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META35:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36:![0-9]+]] +// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG38:![0-9]+]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[DOTCAPTURE_EXPR_]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR__ADDR]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG41:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG41]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG41]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG41]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG41]] +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG41]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG41]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG41]] +// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG41]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG41]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG41]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG41]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG41]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]] +// CHECK1: user_code.entry: +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG42:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG42]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG42]] +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG42]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG42]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG43:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2), !dbg [[DBG45:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]] +// CHECK1: worker.exit: +// CHECK1-NEXT: ret void, !dbg [[DBG41]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__ +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG47:![0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B4:%.*]] = alloca [10 x [10 x i32]], align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55:![0-9]+]] +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META56:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] +// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META57:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58:![0-9]+]] +// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60:![0-9]+]] +// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62:![0-9]+]] +// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG64:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG65:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG65]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG65]] +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG65]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG65]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B4]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG65]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG65]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG74:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG65]] +// CHECK1: omp.dispatch.cond: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9, !dbg [[DBG68]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG68]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG68]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG68]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG68]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG65]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG65]] +// CHECK1: omp.dispatch.body: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG65]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG65]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG65]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1, !dbg [[DBG75:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG75]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG75]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG80:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG80]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG80]] +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[DBG79]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82:![0-9]+]] +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG82]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG84:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 1, !dbg [[DBG85:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG85]] +// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[DBG84]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META86:![0-9]+]], metadata !DIExpression()), !dbg [[DBG87:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG87]] +// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG88:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG89:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG90:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG89]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG89]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG91:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG92:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG92]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG93:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG92]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG92]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG94:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG95:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG95]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG96:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG95]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG95]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95]] +// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG97:![0-9]+]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG98:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG97]] +// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG97]] +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG99:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG100:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG101:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG100]] +// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG100]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG100]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG102:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1, !dbg [[DBG102]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG102]] +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP24]], !dbg [[DBG102]] +// CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG102]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL27]] to i8, !dbg [[DBG102]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG102]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG103:![0-9]+]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG74]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG65]] +// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG65]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP104:![0-9]+]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG74]] +// CHECK1: omp.dispatch.inc: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG65]] +// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG65]] +// CHECK1-NEXT: store i32 [[ADD30]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG65]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP106:![0-9]+]] +// CHECK1: omp.dispatch.end: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG105:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG107:![0-9]+]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG108:![0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116:![0-9]+]] +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] +// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] +// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] +// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] +// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG122:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG122]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG122]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG122]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG122]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG122]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG122]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG122]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG122]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG122]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG122]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3:[0-9]+]], !dbg [[DBG122]] +// CHECK1-NEXT: ret void, !dbg [[DBG122]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13 +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG123:![0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]] +// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META130:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR__ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG132:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG132]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG132]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG132]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG132]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG132]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG132]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG132]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1, !dbg [[DBG132]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG132]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG132]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr addrspace(1) [[TMP8]], i32 [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP9]], i1 [[TOBOOL]]) #[[ATTR3]], !dbg [[DBG132]] +// CHECK1-NEXT: ret void, !dbg [[DBG132]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__ +// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG133:![0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]] +// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141:![0-9]+]] +// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]] +// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG146:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG146]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG146]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG146]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG146]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG146]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG146]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG146]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG146]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG146]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG146]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG146]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false), !dbg [[DBG146]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]] +// CHECK1: user_code.entry: +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG147]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG147]] +// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG147]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG147]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG148:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]] +// CHECK1: worker.exit: +// CHECK1-NEXT: ret void, !dbg [[DBG146]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__ +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG151:![0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155:![0-9]+]] +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] +// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158:![0-9]+]] +// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]] +// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] +// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG164:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG165:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG165]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG165]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG165]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG165]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG165]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG165]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG165]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG165]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG165]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG165]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG165]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META167:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG165]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG165]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG165]] +// CHECK1: omp.dispatch.cond: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG168]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG168]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG168]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG168]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG168]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG165]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG165]] +// CHECK1: omp.dispatch.body: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG165]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG165]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG165]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG174:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG174]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG174]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META175:![0-9]+]], metadata !DIExpression()), !dbg [[DBG177:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG178:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG178]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG178]] +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[F]], align 8, !dbg [[DBG177]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG180:![0-9]+]] +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG180]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META181:![0-9]+]], metadata !DIExpression()), !dbg [[DBG182:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG183:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 1, !dbg [[DBG183]] +// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[H]], align 8, !dbg [[DBG182]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG185]] +// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG186:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG187:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG188:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG187]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM]], !dbg [[DBG187]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX11]], align 4, !dbg [[DBG189:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG190:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX12]], i64 0, i64 0, !dbg [[DBG190]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG191:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG190]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX13]], i64 0, i64 [[IDXPROM14]], !dbg [[DBG190]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX15]], align 4, !dbg [[DBG192:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG193:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX16]], i64 0, i64 0, !dbg [[DBG193]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG194:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG193]] +// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG193]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !dbg [[DBG193]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG195:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG196:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG195]] +// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG195]] +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX22]], align 4, !dbg [[DBG197:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG198:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG198]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG198]] +// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG199:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG200:![0-9]+]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG173]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG165]] +// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG165]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP201:![0-9]+]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG173]] +// CHECK1: omp.dispatch.inc: +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG165]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG165]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]] +// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG165]] +// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG165]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP203:![0-9]+]] +// CHECK1: omp.dispatch.end: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG204:![0-9]+]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG205:![0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META206:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]] +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] +// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] +// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] +// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META211:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] +// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META212:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG213:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG213]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG213]] +// CHECK1-NEXT: ret void, !dbg [[DBG213]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27 +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG214:![0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META217:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218:![0-9]+]] +// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META219:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]] +// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META220:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]] +// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META221:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG222:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG222]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG222]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG222]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG222]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG222]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG222]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG222]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG222]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG222]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]]) #[[ATTR3]], !dbg [[DBG222]] +// CHECK1-NEXT: ret void, !dbg [[DBG222]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__ +// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG223:![0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META228:![0-9]+]], metadata !DIExpression()), !dbg [[DBG229:![0-9]+]] +// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG231:![0-9]+]] +// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG233:![0-9]+]] +// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG236:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG236]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG236]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG236]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG236]] +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false), !dbg [[DBG236]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]] +// CHECK1: user_code.entry: +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB24:[0-9]+]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG237]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG237]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG237]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG237]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]] +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG237]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2), !dbg [[DBG238:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]] +// CHECK1: worker.exit: +// CHECK1-NEXT: ret void, !dbg [[DBG236]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__ +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG241:![0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META244:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245:![0-9]+]] +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META246:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] +// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]] +// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META249:![0-9]+]], metadata !DIExpression()), !dbg [[DBG250:![0-9]+]] +// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG252:![0-9]+]] +// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META253:![0-9]+]], metadata !DIExpression()), !dbg [[DBG254:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG255:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG255]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG255]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG255]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG255]] +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META256:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META257:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META259:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META260:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META261:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG255]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB21:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG255]] +// CHECK1: omp.dispatch.cond: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9, !dbg [[DBG258]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG258]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG258]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG258]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ], !dbg [[DBG258]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]], !dbg [[DBG255]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG255]] +// CHECK1: omp.dispatch.body: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG255]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]], !dbg [[DBG255]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG255]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1, !dbg [[DBG264:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG264]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG264]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META265:![0-9]+]], metadata !DIExpression()), !dbg [[DBG267:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG268:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG268]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG268]] +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[DBG267]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[DBG270]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META271:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG273:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG273]] +// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[DBG272]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG275:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG275]] +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG276:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG277:![0-9]+]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG278:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG277]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG277]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG279:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG280:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG280]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG281:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG280]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG280]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG282:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG283:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG283]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG284:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG283]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG283]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG283]] +// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG285:![0-9]+]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG286:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP26]] to i64, !dbg [[DBG285]] +// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG285]] +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG287:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG288:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG289:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG288]] +// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG288]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG288]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0, !dbg [[DBG288]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG290:![0-9]+]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 1, !dbg [[DBG290]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG291:![0-9]+]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG263]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP29]], 1, !dbg [[DBG255]] +// CHECK1-NEXT: store i32 [[ADD27]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG255]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP292:![0-9]+]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG263]] +// CHECK1: omp.dispatch.inc: +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]], !dbg [[DBG255]] +// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]] +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP32]], [[TMP33]], !dbg [[DBG255]] +// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG255]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP294:![0-9]+]] +// CHECK1: omp.dispatch.end: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB23:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG295:![0-9]+]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG296:![0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]] +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META301:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] +// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META302:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META303:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] +// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META304:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] +// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG306:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG306]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG306]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG306]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG306]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG306]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG306]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG306]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG306]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG306]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG306]] +// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG306]] +// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG306]] +// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG306]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG306]] +// CHECK1-NEXT: ret void, !dbg [[DBG306]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG307:![0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META310:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311:![0-9]+]] +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META312:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]] +// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META313:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]] +// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META314:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG315:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG315]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG315]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG315]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG315]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG315]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG315]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG315]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG315]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG315]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG315]] +// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG315]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG315]] +// CHECK1-NEXT: ret void, !dbg [[DBG315]] +// diff --git a/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp b/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_parallel_generic_loop_codegen.cpp @@ -0,0 +1,387 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU + +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_null_allocator; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + +extern int omp_get_thread_num(void); + +#define N 64 + +int main() { + int x = 0; + int device_result[N] = {0}; + + #pragma omp target parallel loop num_threads(N) uses_allocators(omp_pteam_mem_alloc) allocate(omp_pteam_mem_alloc: x) private(x) map(from: device_result) + for (int i = 0; i < N; i++) { + x = omp_get_thread_num(); + device_result[i] = i + x; + } +} +#endif +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 +// IR-GPU-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR0:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5) +// IR-GPU-NEXT: [[DEVICE_RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEVICE_RESULT_ADDR]] to ptr +// IR-GPU-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OMP_PTEAM_MEM_ALLOC_ADDR]] to ptr +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// IR-GPU-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false) +// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// IR-GPU: user_code.entry: +// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// IR-GPU-NEXT: store ptr [[TMP3]], ptr [[TMP5]], align 8 +// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 64, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2) +// IR-GPU-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) +// IR-GPU-NEXT: ret void +// IR-GPU: worker.exit: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR1:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DEVICE_RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DEVICE_RESULT_ADDR]] to ptr +// IR-GPU-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OMP_PTEAM_MEM_ALLOC_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 63, ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP2]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) +// IR-GPU-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// IR-GPU: omp.dispatch.cond: +// IR-GPU-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 63 +// IR-GPU-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-GPU: cond.true: +// IR-GPU-NEXT: br label [[COND_END:%.*]] +// IR-GPU: cond.false: +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END]] +// IR-GPU: cond.end: +// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// IR-GPU-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// IR-GPU: omp.dispatch.body: +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// IR-GPU-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-GPU-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4 +// IR-GPU-NEXT: [[CALL:%.*]] = call noundef i32 @_Z18omp_get_thread_numv() #[[ATTR5:[0-9]+]] +// IR-GPU-NEXT: store i32 [[CALL]], ptr addrspacecast (ptr addrspace(3) @x to ptr), align 4 +// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @x to ptr), align 4 +// IR-GPU-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4 +// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// IR-GPU-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-GPU: omp.body.continue: +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// IR-GPU-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// IR-GPU: omp.dispatch.inc: +// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// IR-GPU-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// IR-GPU-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_DISPATCH_COND]] +// IR-GPU: omp.dispatch.end: +// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP2]]) +// IR-GPU-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@main +// IR-SAME: () #[[ATTR0:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[X:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DEVICE_RESULT:%.*]] = alloca [64 x i32], align 16 +// IR-NEXT: store i32 0, ptr [[X]], align 4 +// IR-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[DEVICE_RESULT]], i8 0, i64 256, i1 false) +// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8 +// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37(ptr [[DEVICE_RESULT]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]] +// IR-NEXT: ret i32 0 +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 +// IR-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// IR-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8 +// IR-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8 +// IR-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP0]], i32 64) +// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined, ptr [[TMP1]], ptr [[TMP2]]) +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8 +// IR-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8 +// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 63, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8 +// IR-NEXT: [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr [[TMP3]]) +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 63 +// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// IR-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// IR: omp.inner.for.cond.cleanup: +// IR-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// IR-NEXT: [[CALL:%.*]] = call noundef i32 @_Z18omp_get_thread_numv() +// IR-NEXT: store i32 [[CALL]], ptr [[DOTX__VOID_ADDR]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4 +// IR-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// IR-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4 +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// IR-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// IR-NEXT: [[TMP14:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8 +// IR-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr [[TMP14]]) +// IR-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@main +// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[X:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DEVICE_RESULT:%.*]] = alloca [64 x i32], align 16 +// IR-PCH-NEXT: store i32 0, ptr [[X]], align 4 +// IR-PCH-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[DEVICE_RESULT]], i8 0, i64 256, i1 false) +// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8 +// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37(ptr [[DEVICE_RESULT]], ptr [[TMP0]]) #[[ATTR3:[0-9]+]] +// IR-PCH-NEXT: ret i32 0 +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 +// IR-PCH-SAME: (ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// IR-PCH-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8 +// IR-PCH-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP0]], i32 64) +// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined, ptr [[TMP1]], ptr [[TMP2]]) +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(256) [[DEVICE_RESULT:%.*]], ptr noundef [[OMP_PTEAM_MEM_ALLOC:%.*]]) #[[ATTR2]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DEVICE_RESULT_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[OMP_PTEAM_MEM_ALLOC_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DEVICE_RESULT]], ptr [[DEVICE_RESULT_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[OMP_PTEAM_MEM_ALLOC]], ptr [[OMP_PTEAM_MEM_ALLOC_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DEVICE_RESULT_ADDR]], align 8 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 63, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8 +// IR-PCH-NEXT: [[DOTX__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr [[TMP3]]) +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 63 +// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 63, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// IR-PCH-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// IR-PCH: omp.inner.for.cond.cleanup: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// IR-PCH-NEXT: [[CALL:%.*]] = call noundef i32 @_Z18omp_get_thread_numv() +// IR-PCH-NEXT: store i32 [[CALL]], ptr [[DOTX__VOID_ADDR]], align 4 +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTX__VOID_ADDR]], align 4 +// IR-PCH-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// IR-PCH-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4 +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// IR-PCH-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// IR-PCH-NEXT: [[TMP14:%.*]] = load ptr, ptr @omp_pteam_mem_alloc, align 8 +// IR-PCH-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTX__VOID_ADDR]], ptr [[TMP14]]) +// IR-PCH-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_parallel_generic_loop_depend_codegen.cpp b/clang/test/OpenMP/target_parallel_generic_loop_depend_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_parallel_generic_loop_depend_codegen.cpp @@ -0,0 +1,210 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + + + + + +// Check target registration is registered as a Ctor. + + +template +struct TT{ + tx X; + ty Y; +}; + +int global; +extern int global; + +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT d; + static long *plocal; + + #pragma omp target parallel loop device(global + a) depend(in: global) depend(out: a, b, cn[4]) + for (int i = 0; i < 10; ++i) { + } + + + + + + + + #pragma omp target parallel loop device(global + a) nowait depend(inout: global, a, bn) if(target:a) + for (int i = 0; i < *plocal; ++i) { + static int local1; + *plocal = global; + local1 = global; + } + + #pragma omp target parallel loop if(0) firstprivate(global) depend(out:global) + for (int i = 0; i < global; ++i) { + global += 1; + } + + return a; +} + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions in foo(). + + + + + +// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32* +// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]], +// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]], + +// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32* +// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]], +// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]], + +// Create stack storage and store argument in there. +// CHECK-64: [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32* +// CHECK-64: load i32, i32* [[AA_CADDR]], align +// CHECK-32: load i32, i32* [[AA_ADDR]], align + +// CHECK-64: [[BP1_CAST:%.+]] = bitcast i[[SZ]]* [[BP1_PTR:%.+]] to i32* +// CHECK-64: store i32 [[BP1_I32]], i32* [[BP1_CAST]], +// CHECK-32: store i32 [[BP1_I32]], i32* [[BP1_PTR:%.+]], + + +#endif +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66 +// CHECK-SAME: () #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66.omp_outlined) +// CHECK-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK-SAME: () #[[ATTR8:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66 +// TCHECK-SAME: () #[[ATTR0:[0-9]+]] { +// TCHECK-NEXT: entry: +// TCHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66.omp_outlined) +// TCHECK-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// SIMD-ONLY0: {{.*}} +// SIMD-ONLY1: {{.*}} diff --git a/clang/test/OpenMP/target_parallel_generic_loop_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_parallel_generic_loop_uses_allocators_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_parallel_generic_loop_uses_allocators_codegen.cpp @@ -0,0 +1,211 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + +typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1, + omp_atk_alignment = 2, + omp_atk_access = 3, + omp_atk_pool_size = 4, + omp_atk_fallback = 5, + omp_atk_fb_data = 6, + omp_atk_pinned = 7, + omp_atk_partition = 8 +} omp_alloctrait_key_t; +typedef enum omp_alloctrait_value_t { + omp_atv_false = 0, + omp_atv_true = 1, + omp_atv_default = 2, + omp_atv_contended = 3, + omp_atv_uncontended = 4, + omp_atv_sequential = 5, + omp_atv_private = 6, + omp_atv_all = 7, + omp_atv_thread = 8, + omp_atv_pteam = 9, + omp_atv_cgroup = 10, + omp_atv_default_mem_fb = 11, + omp_atv_null_fb = 12, + omp_atv_abort_fb = 13, + omp_atv_allocator_fb = 14, + omp_atv_environment = 15, + omp_atv_nearest = 16, + omp_atv_blocked = 17, + omp_atv_interleaved = 18 +} omp_alloctrait_value_t; + +typedef struct omp_alloctrait_t { + omp_alloctrait_key_t key; + __UINTPTR_TYPE__ value; +} omp_alloctrait_t; + +// Just map the traits variable as a firstprivate variable. + +void foo() { + omp_alloctrait_t traits[10]; + omp_allocator_handle_t my_allocator; + +#pragma omp target parallel loop uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits)) + for (int i = 0; i < 10; ++i) + ; +} + + +// Destroy allocator upon exit from the region. + +#endif +// CHECK-LABEL: define {{[^@]+}}@_Z3foov +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TRAITS:%.*]] = alloca [10 x %struct.omp_alloctrait_t], align 8 +// CHECK-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP15]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.region_id, ptr [[KERNEL_ARGS]]) +// CHECK-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK: omp_offload.failed: +// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66(ptr [[TRAITS]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK: omp_offload.cont: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66 +// CHECK-SAME: (ptr noundef nonnull align 8 dereferenceable(160) [[TRAITS:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TRAITS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TRAITS_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TRAITS_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_init_allocator(i32 [[TMP0]], ptr null, i32 10, ptr [[TMP1]]) +// CHECK-NEXT: [[CONV:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK-NEXT: store i64 [[CONV]], ptr [[MY_ALLOCATOR]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined) +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[MY_ALLOCATOR]], align 8 +// CHECK-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-NEXT: call void @__kmpc_destroy_allocator(i32 [[TMP0]], ptr [[CONV1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK: omp.body.continue: +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK: omp.inner.for.end: +// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp.loop.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp @@ -0,0 +1,2664 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK4 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK4 + +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// Test target codegen - host bc file has to be created first. (no significant differences with host version of target region) +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK10 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK10 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK12 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK12 + +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +#ifdef CK1 + + +int target_teams_fun(int *g){ + int n = 1000; + int a[1000]; + int te = n / 128; + int th = 128; + // discard n_addr + // discard capture expressions for te and th + + #pragma omp target teams loop num_teams(te), thread_limit(th) + for(int i = 0; i < n; i++) { + a[i] = 0; + } + + {{{ + #pragma omp target teams loop is_device_ptr(g) + for(int i = 0; i < n; i++) { + a[i] = g[0]; + } + }}} + + // outlined target regions + + + + + return a[0]; +} + +#endif // CK1 +#endif // HEADER +// CHECK1-LABEL: define {{[^@]+}}@_Z16target_teams_funPi +// CHECK1-SAME: (ptr noundef [[G:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca [1000 x i32], align 4 +// CHECK1-NEXT: [[TE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TH:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_CASTED3:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store i32 1000, ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], 128 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TE]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[TH]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TE]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TH]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 8 +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51(i64 [[TMP4]], ptr [[A]], i64 [[TMP6]], i64 [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[N_CASTED3]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[N_CASTED3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57(i64 [[TMP10]], ptr [[A]], ptr [[TMP11]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[A]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: ret i32 [[TMP12]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51 +// CHECK1-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined, i64 [[TMP5]], ptr [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined.omp_outlined, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57 +// CHECK1-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined, i64 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined.omp_outlined, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], ptr [[TMP20]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_Z16target_teams_funPi +// CHECK2-SAME: (ptr noundef [[G:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca [1000 x i32], align 4 +// CHECK2-NEXT: [[TE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TH:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-NEXT: [[N_CASTED7:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [3 x ptr], align 8 +// CHECK2-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [3 x ptr], align 8 +// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [3 x ptr], align 8 +// CHECK2-NEXT: [[_TMP11:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK2-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK2-NEXT: store i32 1000, ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], 128 +// CHECK2-NEXT: store i32 [[DIV]], ptr [[TE]], align 4 +// CHECK2-NEXT: store i32 128, ptr [[TH]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TE]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TH]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[TMP9]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK2-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK2-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK2-NEXT: store i64 [[TMP6]], ptr [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK2-NEXT: store i64 [[TMP6]], ptr [[TMP16]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK2-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK2-NEXT: store i64 [[TMP8]], ptr [[TMP18]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK2-NEXT: store i64 [[TMP8]], ptr [[TMP19]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK2-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[DIV5]], 1 +// CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK2-NEXT: [[TMP27:%.*]] = zext i32 [[ADD]] to i64 +// CHECK2-NEXT: [[TMP28:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP23]], 0 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-NEXT: store i32 2, ptr [[TMP29]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-NEXT: store i32 4, ptr [[TMP30]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP31]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP22]], ptr [[TMP32]], align 8 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-NEXT: store ptr @.offload_sizes, ptr [[TMP33]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-NEXT: store ptr @.offload_maptypes, ptr [[TMP34]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-NEXT: store ptr null, ptr [[TMP36]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-NEXT: store i64 [[TMP27]], ptr [[TMP37]], align 8 +// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-NEXT: store i64 0, ptr [[TMP38]], align 8 +// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP39]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-NEXT: store [3 x i32] [[TMP28]], ptr [[TMP40]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-NEXT: store i32 0, ptr [[TMP41]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 -1, i32 [[TMP23]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.region_id, ptr [[KERNEL_ARGS]]) +// CHECK2-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK2-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2: omp_offload.failed: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51(i64 [[TMP4]], ptr [[A]], i64 [[TMP6]], i64 [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2: omp_offload.cont: +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP44]], ptr [[N_CASTED7]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, ptr [[N_CASTED7]], align 8 +// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP45]], ptr [[TMP47]], align 8 +// CHECK2-NEXT: [[TMP48:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK2-NEXT: store i64 [[TMP45]], ptr [[TMP48]], align 8 +// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i64 0, i64 0 +// CHECK2-NEXT: store ptr null, ptr [[TMP49]], align 8 +// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP50]], align 8 +// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP51]], align 8 +// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i64 0, i64 1 +// CHECK2-NEXT: store ptr null, ptr [[TMP52]], align 8 +// CHECK2-NEXT: [[TMP53:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP46]], ptr [[TMP53]], align 8 +// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP46]], ptr [[TMP54]], align 8 +// CHECK2-NEXT: [[TMP55:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i64 0, i64 2 +// CHECK2-NEXT: store ptr null, ptr [[TMP55]], align 8 +// CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP58]], ptr [[DOTCAPTURE_EXPR_12]], align 4 +// CHECK2-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4 +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP59]], 0 +// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 +// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i32 [[DIV15]], 1 +// CHECK2-NEXT: store i32 [[SUB16]], ptr [[DOTCAPTURE_EXPR_13]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4 +// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP60]], 1 +// CHECK2-NEXT: [[TMP61:%.*]] = zext i32 [[ADD17]] to i64 +// CHECK2-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0 +// CHECK2-NEXT: store i32 2, ptr [[TMP62]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1 +// CHECK2-NEXT: store i32 3, ptr [[TMP63]], align 4 +// CHECK2-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP56]], ptr [[TMP64]], align 8 +// CHECK2-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP57]], ptr [[TMP65]], align 8 +// CHECK2-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 4 +// CHECK2-NEXT: store ptr @.offload_sizes.1, ptr [[TMP66]], align 8 +// CHECK2-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 5 +// CHECK2-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP67]], align 8 +// CHECK2-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 6 +// CHECK2-NEXT: store ptr null, ptr [[TMP68]], align 8 +// CHECK2-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 7 +// CHECK2-NEXT: store ptr null, ptr [[TMP69]], align 8 +// CHECK2-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 8 +// CHECK2-NEXT: store i64 [[TMP61]], ptr [[TMP70]], align 8 +// CHECK2-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 9 +// CHECK2-NEXT: store i64 0, ptr [[TMP71]], align 8 +// CHECK2-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 10 +// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP72]], align 4 +// CHECK2-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 11 +// CHECK2-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP73]], align 4 +// CHECK2-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 12 +// CHECK2-NEXT: store i32 0, ptr [[TMP74]], align 4 +// CHECK2-NEXT: [[TMP75:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.region_id, ptr [[KERNEL_ARGS18]]) +// CHECK2-NEXT: [[TMP76:%.*]] = icmp ne i32 [[TMP75]], 0 +// CHECK2-NEXT: br i1 [[TMP76]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]] +// CHECK2: omp_offload.failed19: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57(i64 [[TMP45]], ptr [[A]], ptr [[TMP46]]) #[[ATTR2]] +// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT20]] +// CHECK2: omp_offload.cont20: +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[A]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP77:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: ret i32 [[TMP77]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51 +// CHECK2-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 +// CHECK2-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined, i64 [[TMP5]], ptr [[TMP1]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined.omp_outlined, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]]) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined.omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57 +// CHECK2-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined, i64 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined.omp_outlined, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], ptr [[TMP20]]) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined.omp_outlined +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 0 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK2-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK2-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@_Z16target_teams_funPi +// CHECK4-SAME: (ptr noundef [[G:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca [1000 x i32], align 4 +// CHECK4-NEXT: [[TE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TH:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK4-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK4-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK4-NEXT: [[N_CASTED7:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [3 x ptr], align 4 +// CHECK4-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [3 x ptr], align 4 +// CHECK4-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [3 x ptr], align 4 +// CHECK4-NEXT: [[_TMP11:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_12:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK4-NEXT: store i32 1000, ptr [[N]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], 128 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TE]], align 4 +// CHECK4-NEXT: store i32 128, ptr [[TH]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TE]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TH]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[TMP10]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK4-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP12]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP13]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK4-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[TMP15]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[TMP16]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK4-NEXT: store ptr null, ptr [[TMP17]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[TMP18]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[TMP19]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK4-NEXT: store ptr null, ptr [[TMP20]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK4-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[SUB6:%.*]] = sub nsw i32 [[DIV5]], 1 +// CHECK4-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK4-NEXT: [[TMP27:%.*]] = zext i32 [[ADD]] to i64 +// CHECK4-NEXT: [[TMP28:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP23]], 0 +// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK4-NEXT: store i32 2, ptr [[TMP29]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK4-NEXT: store i32 4, ptr [[TMP30]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK4-NEXT: store ptr [[TMP21]], ptr [[TMP31]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK4-NEXT: store ptr [[TMP22]], ptr [[TMP32]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK4-NEXT: store ptr @.offload_sizes, ptr [[TMP33]], align 4 +// CHECK4-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK4-NEXT: store ptr @.offload_maptypes, ptr [[TMP34]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK4-NEXT: store ptr null, ptr [[TMP35]], align 4 +// CHECK4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK4-NEXT: store ptr null, ptr [[TMP36]], align 4 +// CHECK4-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK4-NEXT: store i64 [[TMP27]], ptr [[TMP37]], align 8 +// CHECK4-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK4-NEXT: store i64 0, ptr [[TMP38]], align 8 +// CHECK4-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK4-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP39]], align 4 +// CHECK4-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK4-NEXT: store [3 x i32] [[TMP28]], ptr [[TMP40]], align 4 +// CHECK4-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK4-NEXT: store i32 0, ptr [[TMP41]], align 4 +// CHECK4-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 -1, i32 [[TMP23]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.region_id, ptr [[KERNEL_ARGS]]) +// CHECK4-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK4-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK4: omp_offload.failed: +// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51(i32 [[TMP4]], ptr [[A]], i32 [[TMP6]], i32 [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK4-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK4: omp_offload.cont: +// CHECK4-NEXT: [[TMP44:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP44]], ptr [[N_CASTED7]], align 4 +// CHECK4-NEXT: [[TMP45:%.*]] = load i32, ptr [[N_CASTED7]], align 4 +// CHECK4-NEXT: [[TMP46:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK4-NEXT: [[TMP47:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK4-NEXT: store i32 [[TMP45]], ptr [[TMP47]], align 4 +// CHECK4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK4-NEXT: store i32 [[TMP45]], ptr [[TMP48]], align 4 +// CHECK4-NEXT: [[TMP49:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0 +// CHECK4-NEXT: store ptr null, ptr [[TMP49]], align 4 +// CHECK4-NEXT: [[TMP50:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP50]], align 4 +// CHECK4-NEXT: [[TMP51:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 1 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP51]], align 4 +// CHECK4-NEXT: [[TMP52:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1 +// CHECK4-NEXT: store ptr null, ptr [[TMP52]], align 4 +// CHECK4-NEXT: [[TMP53:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 2 +// CHECK4-NEXT: store ptr [[TMP46]], ptr [[TMP53]], align 4 +// CHECK4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 2 +// CHECK4-NEXT: store ptr [[TMP46]], ptr [[TMP54]], align 4 +// CHECK4-NEXT: [[TMP55:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 2 +// CHECK4-NEXT: store ptr null, ptr [[TMP55]], align 4 +// CHECK4-NEXT: [[TMP56:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP58:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP58]], ptr [[DOTCAPTURE_EXPR_12]], align 4 +// CHECK4-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4 +// CHECK4-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP59]], 0 +// CHECK4-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 +// CHECK4-NEXT: [[SUB16:%.*]] = sub nsw i32 [[DIV15]], 1 +// CHECK4-NEXT: store i32 [[SUB16]], ptr [[DOTCAPTURE_EXPR_13]], align 4 +// CHECK4-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4 +// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP60]], 1 +// CHECK4-NEXT: [[TMP61:%.*]] = zext i32 [[ADD17]] to i64 +// CHECK4-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0 +// CHECK4-NEXT: store i32 2, ptr [[TMP62]], align 4 +// CHECK4-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1 +// CHECK4-NEXT: store i32 3, ptr [[TMP63]], align 4 +// CHECK4-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2 +// CHECK4-NEXT: store ptr [[TMP56]], ptr [[TMP64]], align 4 +// CHECK4-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 3 +// CHECK4-NEXT: store ptr [[TMP57]], ptr [[TMP65]], align 4 +// CHECK4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 4 +// CHECK4-NEXT: store ptr @.offload_sizes.1, ptr [[TMP66]], align 4 +// CHECK4-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 5 +// CHECK4-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP67]], align 4 +// CHECK4-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 6 +// CHECK4-NEXT: store ptr null, ptr [[TMP68]], align 4 +// CHECK4-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 7 +// CHECK4-NEXT: store ptr null, ptr [[TMP69]], align 4 +// CHECK4-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 8 +// CHECK4-NEXT: store i64 [[TMP61]], ptr [[TMP70]], align 8 +// CHECK4-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 9 +// CHECK4-NEXT: store i64 0, ptr [[TMP71]], align 8 +// CHECK4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 10 +// CHECK4-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP72]], align 4 +// CHECK4-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 11 +// CHECK4-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP73]], align 4 +// CHECK4-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 12 +// CHECK4-NEXT: store i32 0, ptr [[TMP74]], align 4 +// CHECK4-NEXT: [[TMP75:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.region_id, ptr [[KERNEL_ARGS18]]) +// CHECK4-NEXT: [[TMP76:%.*]] = icmp ne i32 [[TMP75]], 0 +// CHECK4-NEXT: br i1 [[TMP76]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]] +// CHECK4: omp_offload.failed19: +// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57(i32 [[TMP45]], ptr [[A]], ptr [[TMP46]]) #[[ATTR2]] +// CHECK4-NEXT: br label [[OMP_OFFLOAD_CONT20]] +// CHECK4: omp_offload.cont20: +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[A]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP77:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK4-NEXT: ret i32 [[TMP77]] +// +// +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51 +// CHECK4-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 +// CHECK4-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined, i32 [[TMP5]], ptr [[TMP1]]) +// CHECK4-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4: omp.precond.then: +// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4: cond.true: +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: br label [[COND_END:%.*]] +// CHECK4: cond.false: +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END]] +// CHECK4: cond.end: +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined.omp_outlined, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]]) +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK4-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.end: +// CHECK4-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined.omp_outlined +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4: omp.precond.then: +// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4: cond.true: +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: br label [[COND_END:%.*]] +// CHECK4: cond.false: +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END]] +// CHECK4: cond.end: +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK4-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4: omp.body.continue: +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK4-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.end: +// CHECK4-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57 +// CHECK4-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined, i32 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK4-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4: omp.precond.then: +// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4: cond.true: +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: br label [[COND_END:%.*]] +// CHECK4: cond.false: +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END]] +// CHECK4: cond.end: +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined.omp_outlined, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]], ptr [[TMP18]]) +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK4-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.end: +// CHECK4-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined.omp_outlined +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4: omp.precond.then: +// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4: cond.true: +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: br label [[COND_END:%.*]] +// CHECK4: cond.false: +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END]] +// CHECK4: cond.end: +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK4-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX6]], align 4 +// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4: omp.body.continue: +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK4-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK4-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.end: +// CHECK4-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK4-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK4-NEXT: ret void +// +// +// CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51 +// CHECK10-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK10-NEXT: entry: +// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) +// CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 +// CHECK10-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined, i64 [[TMP5]], ptr [[TMP1]]) +// CHECK10-NEXT: ret void +// +// +// CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK10-NEXT: entry: +// CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK10-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK10: omp.precond.then: +// CHECK10-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK10-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK10: cond.true: +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: br label [[COND_END:%.*]] +// CHECK10: cond.false: +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: br label [[COND_END]] +// CHECK10: cond.end: +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK10: omp.inner.for.cond: +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10: omp.inner.for.body: +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined.omp_outlined, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]]) +// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK10: omp.inner.for.inc: +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK10: omp.inner.for.end: +// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK10: omp.loop.exit: +// CHECK10-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]]) +// CHECK10-NEXT: br label [[OMP_PRECOND_END]] +// CHECK10: omp.precond.end: +// CHECK10-NEXT: ret void +// +// +// CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined.omp_outlined +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK10-NEXT: entry: +// CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK10-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK10-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK10-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK10: omp.precond.then: +// CHECK10-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK10-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK10-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK10: cond.true: +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: br label [[COND_END:%.*]] +// CHECK10: cond.false: +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: br label [[COND_END]] +// CHECK10: cond.end: +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK10: omp.inner.for.cond: +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10: omp.inner.for.body: +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK10-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK10-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK10: omp.body.continue: +// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK10: omp.inner.for.inc: +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK10-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK10: omp.inner.for.end: +// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK10: omp.loop.exit: +// CHECK10-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK10-NEXT: br label [[OMP_PRECOND_END]] +// CHECK10: omp.precond.end: +// CHECK10-NEXT: ret void +// +// +// CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57 +// CHECK10-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK10-NEXT: entry: +// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK10-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined, i64 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK10-NEXT: ret void +// +// +// CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK10-NEXT: entry: +// CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK10-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK10-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK10: omp.precond.then: +// CHECK10-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK10-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK10: cond.true: +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: br label [[COND_END:%.*]] +// CHECK10: cond.false: +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: br label [[COND_END]] +// CHECK10: cond.end: +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK10: omp.inner.for.cond: +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10: omp.inner.for.body: +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK10-NEXT: [[TMP20:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined.omp_outlined, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], ptr [[TMP20]]) +// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK10: omp.inner.for.inc: +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK10-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK10: omp.inner.for.end: +// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK10: omp.loop.exit: +// CHECK10-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]]) +// CHECK10-NEXT: br label [[OMP_PRECOND_END]] +// CHECK10: omp.precond.end: +// CHECK10-NEXT: ret void +// +// +// CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined.omp_outlined +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK10-NEXT: entry: +// CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK10-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK10-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK10-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK10-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK10: omp.precond.then: +// CHECK10-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK10-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK10-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK10: cond.true: +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: br label [[COND_END:%.*]] +// CHECK10: cond.false: +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: br label [[COND_END]] +// CHECK10: cond.end: +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK10: omp.inner.for.cond: +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK10: omp.inner.for.body: +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK10-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 0 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK10-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7]], align 4 +// CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK10: omp.body.continue: +// CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK10: omp.inner.for.inc: +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK10-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK10: omp.inner.for.end: +// CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK10: omp.loop.exit: +// CHECK10-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK10-NEXT: br label [[OMP_PRECOND_END]] +// CHECK10: omp.precond.end: +// CHECK10-NEXT: ret void +// +// +// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51 +// CHECK12-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) +// CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 +// CHECK12-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined, i32 [[TMP5]], ptr [[TMP1]]) +// CHECK12-NEXT: ret void +// +// +// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined +// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK12-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK12: omp.precond.then: +// CHECK12-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK12: cond.true: +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: br label [[COND_END:%.*]] +// CHECK12: cond.false: +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END]] +// CHECK12: cond.end: +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK12-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK12: omp.inner.for.cond: +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK12: omp.inner.for.body: +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined.omp_outlined, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]]) +// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK12: omp.inner.for.inc: +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK12-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK12: omp.inner.for.end: +// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK12: omp.loop.exit: +// CHECK12-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK12-NEXT: br label [[OMP_PRECOND_END]] +// CHECK12: omp.precond.end: +// CHECK12-NEXT: ret void +// +// +// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined.omp_outlined +// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK12-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK12: omp.precond.then: +// CHECK12-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK12: cond.true: +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: br label [[COND_END:%.*]] +// CHECK12: cond.false: +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END]] +// CHECK12: cond.end: +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK12-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK12: omp.inner.for.cond: +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK12: omp.inner.for.body: +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK12-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK12-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK12: omp.body.continue: +// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK12: omp.inner.for.inc: +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK12-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK12: omp.inner.for.end: +// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK12: omp.loop.exit: +// CHECK12-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK12-NEXT: br label [[OMP_PRECOND_END]] +// CHECK12: omp.precond.end: +// CHECK12-NEXT: ret void +// +// +// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57 +// CHECK12-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined, i32 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK12-NEXT: ret void +// +// +// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined +// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK12-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK12: omp.precond.then: +// CHECK12-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK12: cond.true: +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: br label [[COND_END:%.*]] +// CHECK12: cond.false: +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END]] +// CHECK12: cond.end: +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK12-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK12: omp.inner.for.cond: +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK12: omp.inner.for.body: +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined.omp_outlined, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]], ptr [[TMP18]]) +// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK12: omp.inner.for.inc: +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK12-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK12: omp.inner.for.end: +// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK12: omp.loop.exit: +// CHECK12-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK12-NEXT: br label [[OMP_PRECOND_END]] +// CHECK12: omp.precond.end: +// CHECK12-NEXT: ret void +// +// +// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined.omp_outlined +// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK12-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK12: omp.precond.then: +// CHECK12-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK12: cond.true: +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: br label [[COND_END:%.*]] +// CHECK12: cond.false: +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END]] +// CHECK12: cond.end: +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK12-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK12: omp.inner.for.cond: +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK12: omp.inner.for.body: +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK12-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK12-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX6]], align 4 +// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK12: omp.body.continue: +// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK12: omp.inner.for.inc: +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK12-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK12: omp.inner.for.end: +// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK12: omp.loop.exit: +// CHECK12-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK12-NEXT: br label [[OMP_PRECOND_END]] +// CHECK12: omp.precond.end: +// CHECK12-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp @@ -0,0 +1,2688 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=IR-GPU + +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER +int foo() { + int i; + int j; + int sum[10][10]; + + #pragma omp target teams loop reduction(+:sum) collapse(2) \ + bind(parallel) order(concurrent) lastprivate(j) map(tofrom:sum) + for(i=0; i<10; i++) + for(j=0; j<10; j++) + sum[i][j] += i; + + return 0; +} +#endif +// IR-PCH-HOST-LABEL: define {{[^@]+}}@_Z3foov +// IR-PCH-HOST-SAME: () #[[ATTR0:[0-9]+]] { +// IR-PCH-HOST-NEXT: entry: +// IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-HOST-NEXT: store i32 [[TMP0]], ptr [[J_CASTED]], align 4 +// IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i64, ptr [[J_CASTED]], align 8 +// IR-PCH-HOST-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22(i64 [[TMP1]], ptr [[SUM]]) #[[ATTR2:[0-9]+]] +// IR-PCH-HOST-NEXT: ret i32 0 +// IR-PCH-HOST-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 +// IR-PCH-HOST-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { +// IR-PCH-HOST-NEXT: entry: +// IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 +// IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 +// IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 +// IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i32, ptr [[J_ADDR]], align 4 +// IR-PCH-HOST-NEXT: store i32 [[TMP1]], ptr [[J_CASTED]], align 4 +// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load i64, ptr [[J_CASTED]], align 8 +// IR-PCH-HOST-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP2]], ptr [[TMP0]]) +// IR-PCH-HOST-NEXT: ret void +// IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp_outlined. +// IR-PCH-HOST-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { +// IR-PCH-HOST-NEXT: entry: +// IR-PCH-HOST-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-HOST-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-HOST-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-PCH-HOST-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[J3:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[J4:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-HOST-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// IR-PCH-HOST-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-HOST-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 +// IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 +// IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 +// IR-PCH-HOST-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0 +// IR-PCH-HOST-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] +// IR-PCH-HOST: omp.arrayinit.body: +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// IR-PCH-HOST-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] +// IR-PCH-HOST: omp.arrayinit.done: +// IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-HOST-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-HOST-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// IR-PCH-HOST-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-HOST-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-HOST-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// IR-PCH-HOST-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH-HOST: cond.true: +// IR-PCH-HOST-NEXT: br label [[COND_END:%.*]] +// IR-PCH-HOST: cond.false: +// IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-HOST-NEXT: br label [[COND_END]] +// IR-PCH-HOST: cond.end: +// IR-PCH-HOST-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// IR-PCH-HOST-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-HOST-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-HOST-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH-HOST: omp.inner.for.cond: +// IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-HOST-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-HOST-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// IR-PCH-HOST-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH-HOST: omp.inner.for.body: +// IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-HOST-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// IR-PCH-HOST-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-HOST-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// IR-PCH-HOST-NEXT: [[TMP13:%.*]] = load i32, ptr [[J3]], align 4 +// IR-PCH-HOST-NEXT: store i32 [[TMP13]], ptr [[J_CASTED]], align 4 +// IR-PCH-HOST-NEXT: [[TMP14:%.*]] = load i64, ptr [[J_CASTED]], align 8 +// IR-PCH-HOST-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined..1, i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], ptr [[SUM1]]) +// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH-HOST: omp.inner.for.inc: +// IR-PCH-HOST-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-HOST-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH-HOST: omp.inner.for.end: +// IR-PCH-HOST-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH-HOST: omp.loop.exit: +// IR-PCH-HOST-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-HOST-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// IR-PCH-HOST-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]]) +// IR-PCH-HOST-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-HOST-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// IR-PCH-HOST-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR-PCH-HOST: .omp.lastprivate.then: +// IR-PCH-HOST-NEXT: store i32 10, ptr [[J3]], align 4 +// IR-PCH-HOST-NEXT: [[TMP21:%.*]] = load i32, ptr [[J3]], align 4 +// IR-PCH-HOST-NEXT: store i32 [[TMP21]], ptr [[J_ADDR]], align 4 +// IR-PCH-HOST-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// IR-PCH-HOST: .omp.lastprivate.done: +// IR-PCH-HOST-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// IR-PCH-HOST-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 +// IR-PCH-HOST-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-HOST-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// IR-PCH-HOST-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-HOST-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// IR-PCH-HOST-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// IR-PCH-HOST-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// IR-PCH-HOST-NEXT: ] +// IR-PCH-HOST: .omp.reduction.case1: +// IR-PCH-HOST-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP26]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-PCH-HOST: omp.arraycpy.body: +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-HOST-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 +// IR-PCH-HOST-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-PCH-HOST-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// IR-PCH-HOST-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// IR-PCH-HOST: omp.arraycpy.done10: +// IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR-PCH-HOST: .omp.reduction.case2: +// IR-PCH-HOST-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP0]], [[TMP29]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] +// IR-PCH-HOST: omp.arraycpy.body12: +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// IR-PCH-HOST-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 +// IR-PCH-HOST-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP30]] monotonic, align 4 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] +// IR-PCH-HOST: omp.arraycpy.done18: +// IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR-PCH-HOST: .omp.reduction.default: +// IR-PCH-HOST-NEXT: ret void +// IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp_outlined..1 +// IR-PCH-HOST-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { +// IR-PCH-HOST-NEXT: entry: +// IR-PCH-HOST-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-HOST-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-HOST-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-HOST-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-HOST-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-HOST-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-HOST-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[J3:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-PCH-HOST-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[J5:%.*]] = alloca i32, align 4 +// IR-PCH-HOST-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// IR-PCH-HOST-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-HOST-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-HOST-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-HOST-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-HOST-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 +// IR-PCH-HOST-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 +// IR-PCH-HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 +// IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-HOST-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-HOST-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-HOST-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-HOST-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// IR-PCH-HOST-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// IR-PCH-HOST-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-HOST-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-HOST-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-HOST-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 +// IR-PCH-HOST-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] +// IR-PCH-HOST: omp.arrayinit.body: +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// IR-PCH-HOST-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] +// IR-PCH-HOST: omp.arrayinit.done: +// IR-PCH-HOST-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// IR-PCH-HOST-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-HOST-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-HOST-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 +// IR-PCH-HOST-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH-HOST: cond.true: +// IR-PCH-HOST-NEXT: br label [[COND_END:%.*]] +// IR-PCH-HOST: cond.false: +// IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-HOST-NEXT: br label [[COND_END]] +// IR-PCH-HOST: cond.end: +// IR-PCH-HOST-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// IR-PCH-HOST-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-HOST-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-HOST-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH-HOST: omp.inner.for.cond: +// IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-HOST-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// IR-PCH-HOST-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH-HOST: omp.inner.for.body: +// IR-PCH-HOST-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-HOST-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 10 +// IR-PCH-HOST-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-HOST-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-HOST-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-HOST-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP13]], 10 +// IR-PCH-HOST-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 +// IR-PCH-HOST-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[MUL8]] +// IR-PCH-HOST-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 +// IR-PCH-HOST-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// IR-PCH-HOST-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-HOST-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-HOST-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-HOST-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// IR-PCH-HOST-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] +// IR-PCH-HOST-NEXT: [[TMP16:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-HOST-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 +// IR-PCH-HOST-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] +// IR-PCH-HOST-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-HOST-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP14]] +// IR-PCH-HOST-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-HOST-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH-HOST: omp.body.continue: +// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH-HOST: omp.inner.for.inc: +// IR-PCH-HOST-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-HOST-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP18]], 1 +// IR-PCH-HOST-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-HOST-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// IR-PCH-HOST: omp.inner.for.end: +// IR-PCH-HOST-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH-HOST: omp.loop.exit: +// IR-PCH-HOST-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-HOST-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// IR-PCH-HOST-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// IR-PCH-HOST-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// IR-PCH-HOST-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 +// IR-PCH-HOST-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-HOST-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// IR-PCH-HOST-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-HOST-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// IR-PCH-HOST-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// IR-PCH-HOST-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// IR-PCH-HOST-NEXT: ] +// IR-PCH-HOST: .omp.reduction.case1: +// IR-PCH-HOST-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-PCH-HOST: omp.arraycpy.body: +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-HOST-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 +// IR-PCH-HOST-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-PCH-HOST-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// IR-PCH-HOST-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] +// IR-PCH-HOST: omp.arraycpy.done19: +// IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR-PCH-HOST: .omp.reduction.case2: +// IR-PCH-HOST-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP0]], [[TMP28]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] +// IR-PCH-HOST: omp.arraycpy.body21: +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] +// IR-PCH-HOST-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 +// IR-PCH-HOST-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP29]] monotonic, align 4 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] +// IR-PCH-HOST: omp.arraycpy.done27: +// IR-PCH-HOST-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-HOST-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR-PCH-HOST: .omp.reduction.default: +// IR-PCH-HOST-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-HOST-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// IR-PCH-HOST-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR-PCH-HOST: .omp.lastprivate.then: +// IR-PCH-HOST-NEXT: store i32 10, ptr [[J3]], align 4 +// IR-PCH-HOST-NEXT: [[TMP33:%.*]] = load i32, ptr [[J3]], align 4 +// IR-PCH-HOST-NEXT: store i32 [[TMP33]], ptr [[J_ADDR]], align 4 +// IR-PCH-HOST-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// IR-PCH-HOST: .omp.lastprivate.done: +// IR-PCH-HOST-NEXT: ret void +// IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func +// IR-PCH-HOST-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +// IR-PCH-HOST-NEXT: entry: +// IR-PCH-HOST-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-HOST-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// IR-PCH-HOST-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// IR-PCH-HOST-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// IR-PCH-HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// IR-PCH-HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// IR-PCH-HOST-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-PCH-HOST: omp.arraycpy.body: +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// IR-PCH-HOST: omp.arraycpy.done2: +// IR-PCH-HOST-NEXT: ret void +// IR-PCH-HOST-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2 +// IR-PCH-HOST-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// IR-PCH-HOST-NEXT: entry: +// IR-PCH-HOST-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-HOST-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// IR-PCH-HOST-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// IR-PCH-HOST-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// IR-PCH-HOST-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// IR-PCH-HOST-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// IR-PCH-HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// IR-PCH-HOST-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// IR-PCH-HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// IR-PCH-HOST-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// IR-PCH-HOST-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-PCH-HOST: omp.arraycpy.body: +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-HOST-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-HOST-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-PCH-HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// IR-PCH-HOST-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-PCH-HOST-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// IR-PCH-HOST-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// IR-PCH-HOST: omp.arraycpy.done2: +// IR-PCH-HOST-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l23 +// CHECK-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr +// CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr +// CHECK-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr +// CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// CHECK-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false) +// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK: user_code.entry: +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[J_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[J_CASTED_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 +// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) +// CHECK-NEXT: ret void +// CHECK: worker.exit: +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[_TMP2:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[J4:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5) +// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr +// CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr +// CHECK-NEXT: [[SUM1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM1]] to ptr +// CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// CHECK-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr +// CHECK-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr +// CHECK-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr +// CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// CHECK-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr +// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// CHECK-NEXT: [[J4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J4]] to ptr +// CHECK-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr +// CHECK-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1_ASCAST]], i32 0, i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 +// CHECK-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] +// CHECK-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] +// CHECK: omp.arrayinit.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] +// CHECK: omp.arrayinit.done: +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// CHECK-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP7]], 100 +// CHECK-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP12]], ptr [[J_CASTED_ASCAST]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP9]] to ptr +// CHECK-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP11]] to ptr +// CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP13]] to ptr +// CHECK-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 +// CHECK-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP20]], align 8 +// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP22]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__.1, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4) +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP29]], 99 +// CHECK-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK: cond.true9: +// CHECK-NEXT: br label [[COND_END11:%.*]] +// CHECK: cond.false10: +// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-NEXT: br label [[COND_END11]] +// CHECK: cond.end11: +// CHECK-NEXT: [[COND12:%.*]] = phi i32 [ 99, [[COND_TRUE9]] ], [ [[TMP30]], [[COND_FALSE10]] ] +// CHECK-NEXT: store i32 [[COND12]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK: omp.inner.for.end: +// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp.loop.exit: +// CHECK-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP33]]) +// CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK: .omp.lastprivate.then: +// CHECK-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 +// CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP36]], ptr [[J_ADDR_ASCAST]], align 4 +// CHECK-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK: .omp.lastprivate.done: +// CHECK-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP39]], align 8 +// CHECK-NEXT: [[TMP40:%.*]] = load ptr, ptr addrspace(1) @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// CHECK-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP38]], ptr [[TMP40]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func.3, ptr @_omp_reduction_inter_warp_copy_func.4, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) +// CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[TMP41]], 1 +// CHECK-NEXT: br i1 [[TMP42]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK: .omp.reduction.then: +// CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP43]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 +// CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK-NEXT: store i32 [[ADD14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP43]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done17: +// CHECK-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP38]]) +// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK: .omp.reduction.done: +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__.1 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[J5:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr +// CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr +// CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr +// CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr +// CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// CHECK-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr +// CHECK-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr +// CHECK-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr +// CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// CHECK-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr +// CHECK-NEXT: [[SUM4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM4]] to ptr +// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// CHECK-NEXT: [[J5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J5]] to ptr +// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 +// CHECK-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i32 0, i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 +// CHECK-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// CHECK-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] +// CHECK: omp.arrayinit.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] +// CHECK: omp.arrayinit.done: +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK-NEXT: [[CONV6:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP7]] +// CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV6]], [[TMP8]] +// CHECK-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP9]], 10 +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP11]], 10 +// CHECK-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], [[MUL8]] +// CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK-NEXT: store i32 [[ADD10]], ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i64 0, i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP15]], [[TMP12]] +// CHECK-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK: omp.body.continue: +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK: omp.inner.for.end: +// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp.loop.exit: +// CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP19]]) +// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: store ptr [[SUM4_ASCAST]], ptr [[TMP22]], align 8 +// CHECK-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP21]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) +// CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP23]], 1 +// CHECK-NEXT: br i1 [[TMP24]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK: .omp.reduction.then: +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done19: +// CHECK-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP21]]) +// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK: .omp.reduction.done: +// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// CHECK-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK: .omp.lastprivate.then: +// CHECK-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 +// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[TMP30]], ptr [[J_ADDR_ASCAST]], align 4 +// CHECK-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK: .omp.lastprivate.done: +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func +// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) +// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) +// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) +// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr +// CHECK-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr +// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr +// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr +// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 +// CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 +// CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 +// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 +// CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] +// CHECK: .shuffle.pre_cond: +// CHECK-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] +// CHECK-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] +// CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 +// CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] +// CHECK-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 +// CHECK-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] +// CHECK: .shuffle.then: +// CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 +// CHECK-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) +// CHECK-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 +// CHECK-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 +// CHECK-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 +// CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND]] +// CHECK: .shuffle.exit: +// CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 +// CHECK-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] +// CHECK-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 +// CHECK-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 +// CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] +// CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] +// CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] +// CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] +// CHECK-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK: then: +// CHECK-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] +// CHECK-NEXT: br label [[IFCONT:%.*]] +// CHECK: else: +// CHECK-NEXT: br label [[IFCONT]] +// CHECK: ifcont: +// CHECK-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK: then4: +// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 +// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) +// CHECK-NEXT: br label [[IFCONT6:%.*]] +// CHECK: else5: +// CHECK-NEXT: br label [[IFCONT6]] +// CHECK: ifcont6: +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func +// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// CHECK-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr +// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 +// CHECK-NEXT: br label [[PRECOND:%.*]] +// CHECK: precond: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 +// CHECK-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK: body: +// CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4:[0-9]+]] to ptr), i32 [[TMP2]]) +// CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK: then: +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 +// CHECK-NEXT: br label [[IFCONT:%.*]] +// CHECK: else: +// CHECK-NEXT: br label [[IFCONT]] +// CHECK: ifcont: +// CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 +// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] +// CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] +// CHECK: then2: +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] +// CHECK-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 +// CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK-NEXT: br label [[IFCONT4:%.*]] +// CHECK: else3: +// CHECK-NEXT: br label [[IFCONT4]] +// CHECK: ifcont4: +// CHECK-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 +// CHECK-NEXT: br label [[PRECOND]] +// CHECK: exit: +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func.3 +// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) +// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) +// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) +// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr +// CHECK-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr +// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr +// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr +// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 +// CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 +// CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 +// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 +// CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] +// CHECK: .shuffle.pre_cond: +// CHECK-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] +// CHECK-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] +// CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 +// CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] +// CHECK-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 +// CHECK-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] +// CHECK: .shuffle.then: +// CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 +// CHECK-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) +// CHECK-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 +// CHECK-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 +// CHECK-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 +// CHECK-NEXT: br label [[DOTSHUFFLE_PRE_COND]] +// CHECK: .shuffle.exit: +// CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 +// CHECK-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] +// CHECK-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 +// CHECK-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 +// CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] +// CHECK-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] +// CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] +// CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] +// CHECK-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK: then: +// CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] +// CHECK-NEXT: br label [[IFCONT:%.*]] +// CHECK: else: +// CHECK-NEXT: br label [[IFCONT]] +// CHECK: ifcont: +// CHECK-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK: then4: +// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 +// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) +// CHECK-NEXT: br label [[IFCONT6:%.*]] +// CHECK: else5: +// CHECK-NEXT: br label [[IFCONT6]] +// CHECK: ifcont6: +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func.4 +// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// CHECK-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr +// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 +// CHECK-NEXT: br label [[PRECOND:%.*]] +// CHECK: precond: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 +// CHECK-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK: body: +// CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) +// CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK: then: +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 +// CHECK-NEXT: br label [[IFCONT:%.*]] +// CHECK: else: +// CHECK-NEXT: br label [[IFCONT]] +// CHECK: ifcont: +// CHECK-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 +// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] +// CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] +// CHECK: then2: +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] +// CHECK-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 +// CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK-NEXT: br label [[IFCONT4:%.*]] +// CHECK: else3: +// CHECK-NEXT: br label [[IFCONT4]] +// CHECK: ifcont4: +// CHECK-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 +// CHECK-NEXT: br label [[PRECOND]] +// CHECK: exit: +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func +// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr +// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP8]], ptr align 4 [[TMP7]], i64 400, i1 false) +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func +// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr +// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr +// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] +// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 +// CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP7]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func +// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr +// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 128 [[TMP8]], i64 400, i1 false) +// CHECK-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func +// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr +// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr +// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 +// CHECK-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] +// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 +// CHECK-NEXT: call void @"_omp$reduction$reduction_func.2"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 +// IR-GPU-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR0:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr +// IR-GPU-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr +// IR-GPU-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr +// IR-GPU-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// IR-GPU-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false) +// IR-GPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 +// IR-GPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// IR-GPU: user_code.entry: +// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// IR-GPU-NEXT: [[TMP3:%.*]] = load i32, ptr [[J_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP3]], ptr [[J_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// IR-GPU-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// IR-GPU-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) +// IR-GPU-NEXT: ret void +// IR-GPU: worker.exit: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[_TMP2:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J4:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr +// IR-GPU-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr +// IR-GPU-NEXT: [[SUM1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM1]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr +// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NEXT: [[J4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J4]] to ptr +// IR-GPU-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr +// IR-GPU-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1_ASCAST]], i32 0, i32 0, i32 0 +// IR-GPU-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 +// IR-GPU-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] +// IR-GPU-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] +// IR-GPU: omp.arrayinit.body: +// IR-GPU-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// IR-GPU-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-GPU-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-GPU-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] +// IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] +// IR-GPU: omp.arrayinit.done: +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// IR-GPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// IR-GPU-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// IR-GPU-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-GPU: cond.true: +// IR-GPU-NEXT: br label [[COND_END:%.*]] +// IR-GPU: cond.false: +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END]] +// IR-GPU: cond.end: +// IR-GPU-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// IR-GPU-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP7]], 100 +// IR-GPU-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP12]], ptr [[J_CASTED_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP13:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP9]] to ptr +// IR-GPU-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 +// IR-GPU-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// IR-GPU-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP11]] to ptr +// IR-GPU-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// IR-GPU-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// IR-GPU-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP13]] to ptr +// IR-GPU-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 +// IR-GPU-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 +// IR-GPU-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP20]], align 8 +// IR-GPU-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// IR-GPU-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP22]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4) +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// IR-GPU-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// IR-GPU-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// IR-GPU-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP29]], 99 +// IR-GPU-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// IR-GPU: cond.true9: +// IR-GPU-NEXT: br label [[COND_END11:%.*]] +// IR-GPU: cond.false10: +// IR-GPU-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[COND_END11]] +// IR-GPU: cond.end11: +// IR-GPU-NEXT: [[COND12:%.*]] = phi i32 [ 99, [[COND_TRUE9]] ], [ [[TMP30]], [[COND_FALSE10]] ] +// IR-GPU-NEXT: store i32 [[COND12]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP33]]) +// IR-GPU-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// IR-GPU-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR-GPU: .omp.lastprivate.then: +// IR-GPU-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP36:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP36]], ptr [[J_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// IR-GPU: .omp.lastprivate.done: +// IR-GPU-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// IR-GPU-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP39]], align 8 +// IR-GPU-NEXT: [[TMP40:%.*]] = load ptr, ptr addrspace(1) @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// IR-GPU-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP38]], ptr [[TMP40]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func.1, ptr @_omp_reduction_inter_warp_copy_func.2, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) +// IR-GPU-NEXT: [[TMP42:%.*]] = icmp eq i32 [[TMP41]], 1 +// IR-GPU-NEXT: br i1 [[TMP42]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// IR-GPU: .omp.reduction.then: +// IR-GPU-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// IR-GPU-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP43]] +// IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-GPU: omp.arraycpy.body: +// IR-GPU-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-GPU-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-GPU-NEXT: [[TMP44:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 +// IR-GPU-NEXT: [[TMP45:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-GPU-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// IR-GPU-NEXT: store i32 [[ADD14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 +// IR-GPU-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 +// IR-GPU-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-GPU-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP43]] +// IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// IR-GPU: omp.arraycpy.done17: +// IR-GPU-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP38]]) +// IR-GPU-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// IR-GPU: .omp.reduction.done: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp_outlined +// IR-GPU-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// IR-GPU-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) +// IR-GPU-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[J5:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr +// IR-GPU-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr +// IR-GPU-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr +// IR-GPU-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// IR-GPU-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// IR-GPU-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr +// IR-GPU-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr +// IR-GPU-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// IR-GPU-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// IR-GPU-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr +// IR-GPU-NEXT: [[SUM4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM4]] to ptr +// IR-GPU-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// IR-GPU-NEXT: [[J5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J5]] to ptr +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr +// IR-GPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// IR-GPU-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// IR-GPU-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i32 0, i32 0, i32 0 +// IR-GPU-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 +// IR-GPU-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// IR-GPU-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] +// IR-GPU: omp.arrayinit.body: +// IR-GPU-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// IR-GPU-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-GPU-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-GPU-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] +// IR-GPU: omp.arrayinit.done: +// IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) +// IR-GPU-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-GPU: omp.inner.for.cond: +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// IR-GPU-NEXT: [[CONV6:%.*]] = sext i32 [[TMP7]] to i64 +// IR-GPU-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP7]] +// IR-GPU-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV6]], [[TMP8]] +// IR-GPU-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-GPU: omp.inner.for.body: +// IR-GPU-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// IR-GPU-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP9]], 10 +// IR-GPU-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// IR-GPU-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-GPU-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// IR-GPU-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// IR-GPU-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// IR-GPU-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP11]], 10 +// IR-GPU-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 +// IR-GPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], [[MUL8]] +// IR-GPU-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 +// IR-GPU-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// IR-GPU-NEXT: store i32 [[ADD10]], ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// IR-GPU-NEXT: [[TMP12:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// IR-GPU-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4_ASCAST]], i64 0, i64 [[IDXPROM]] +// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// IR-GPU-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP14]] to i64 +// IR-GPU-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] +// IR-GPU-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] +// IR-GPU-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP15]], [[TMP12]] +// IR-GPU-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP7]] +// IR-GPU-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-GPU: omp.body.continue: +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-GPU: omp.inner.for.inc: +// IR-GPU-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// IR-GPU-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// IR-GPU-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// IR-GPU-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]] +// IR-GPU-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// IR-GPU: omp.inner.for.end: +// IR-GPU-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-GPU: omp.loop.exit: +// IR-GPU-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// IR-GPU-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP19]]) +// IR-GPU-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// IR-GPU-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: store ptr [[SUM4_ASCAST]], ptr [[TMP22]], align 8 +// IR-GPU-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP21]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) +// IR-GPU-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP23]], 1 +// IR-GPU-NEXT: br i1 [[TMP24]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// IR-GPU: .omp.reduction.then: +// IR-GPU-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// IR-GPU-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] +// IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-GPU: omp.arraycpy.body: +// IR-GPU-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4_ASCAST]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-GPU-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-GPU-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 +// IR-GPU-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-GPU-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// IR-GPU-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 +// IR-GPU-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// IR-GPU-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-GPU-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] +// IR-GPU-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] +// IR-GPU: omp.arraycpy.done19: +// IR-GPU-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP21]]) +// IR-GPU-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// IR-GPU: .omp.reduction.done: +// IR-GPU-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// IR-GPU-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR-GPU: .omp.lastprivate.then: +// IR-GPU-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP30:%.*]] = load i32, ptr [[J3_ASCAST]], align 4 +// IR-GPU-NEXT: store i32 [[TMP30]], ptr [[J_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// IR-GPU: .omp.lastprivate.done: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func +// IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3:[0-9]+]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr +// IR-GPU-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr +// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 +// IR-GPU-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 +// IR-GPU-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 +// IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 +// IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// IR-GPU-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 +// IR-GPU-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] +// IR-GPU: .shuffle.pre_cond: +// IR-GPU-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] +// IR-GPU-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] +// IR-GPU-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 +// IR-GPU-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// IR-GPU-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] +// IR-GPU-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// IR-GPU-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 +// IR-GPU-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] +// IR-GPU: .shuffle.then: +// IR-GPU-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 +// IR-GPU-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() +// IR-GPU-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 +// IR-GPU-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) +// IR-GPU-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 +// IR-GPU-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 +// IR-GPU-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 +// IR-GPU-NEXT: br label [[DOTSHUFFLE_PRE_COND]] +// IR-GPU: .shuffle.exit: +// IR-GPU-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 +// IR-GPU-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 +// IR-GPU-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 +// IR-GPU-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// IR-GPU-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] +// IR-GPU-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 +// IR-GPU-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 +// IR-GPU-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 +// IR-GPU-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] +// IR-GPU-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 +// IR-GPU-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] +// IR-GPU-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] +// IR-GPU-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] +// IR-GPU-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] +// IR-GPU: then: +// IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] +// IR-GPU-NEXT: br label [[IFCONT:%.*]] +// IR-GPU: else: +// IR-GPU-NEXT: br label [[IFCONT]] +// IR-GPU: ifcont: +// IR-GPU-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 +// IR-GPU-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// IR-GPU-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// IR-GPU-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// IR-GPU: then4: +// IR-GPU-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 +// IR-GPU-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 +// IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) +// IR-GPU-NEXT: br label [[IFCONT6:%.*]] +// IR-GPU: else5: +// IR-GPU-NEXT: br label [[IFCONT6]] +// IR-GPU: ifcont6: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func +// IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// IR-GPU-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr +// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// IR-GPU-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 +// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// IR-GPU-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 +// IR-GPU-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[PRECOND:%.*]] +// IR-GPU: precond: +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 +// IR-GPU-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] +// IR-GPU: body: +// IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4:[0-9]+]] to ptr), i32 [[TMP2]]) +// IR-GPU-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// IR-GPU-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// IR-GPU: then: +// IR-GPU-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// IR-GPU-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] +// IR-GPU-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +// IR-GPU-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 +// IR-GPU-NEXT: br label [[IFCONT:%.*]] +// IR-GPU: else: +// IR-GPU-NEXT: br label [[IFCONT]] +// IR-GPU: ifcont: +// IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) +// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 +// IR-GPU-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] +// IR-GPU-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] +// IR-GPU: then2: +// IR-GPU-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// IR-GPU-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// IR-GPU-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] +// IR-GPU-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 +// IR-GPU-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// IR-GPU-NEXT: br label [[IFCONT4:%.*]] +// IR-GPU: else3: +// IR-GPU-NEXT: br label [[IFCONT4]] +// IR-GPU: ifcont4: +// IR-GPU-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 +// IR-GPU-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[PRECOND]] +// IR-GPU: exit: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func.1 +// IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr +// IR-GPU-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr +// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2 +// IR-GPU-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2 +// IR-GPU-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2 +// IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2 +// IR-GPU-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2 +// IR-GPU-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2 +// IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// IR-GPU-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP11:%.*]] = getelementptr [10 x [10 x i32]], ptr [[TMP9]], i64 1 +// IR-GPU-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] +// IR-GPU: .shuffle.pre_cond: +// IR-GPU-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] +// IR-GPU-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] +// IR-GPU-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 +// IR-GPU-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// IR-GPU-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] +// IR-GPU-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// IR-GPU-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 +// IR-GPU-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] +// IR-GPU: .shuffle.then: +// IR-GPU-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 4 +// IR-GPU-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() +// IR-GPU-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 +// IR-GPU-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) +// IR-GPU-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 4 +// IR-GPU-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 +// IR-GPU-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 +// IR-GPU-NEXT: br label [[DOTSHUFFLE_PRE_COND]] +// IR-GPU: .shuffle.exit: +// IR-GPU-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8 +// IR-GPU-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 +// IR-GPU-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 +// IR-GPU-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// IR-GPU-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] +// IR-GPU-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 +// IR-GPU-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 +// IR-GPU-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 +// IR-GPU-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] +// IR-GPU-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 +// IR-GPU-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] +// IR-GPU-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] +// IR-GPU-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] +// IR-GPU-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] +// IR-GPU: then: +// IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]] +// IR-GPU-NEXT: br label [[IFCONT:%.*]] +// IR-GPU: else: +// IR-GPU-NEXT: br label [[IFCONT]] +// IR-GPU: ifcont: +// IR-GPU-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 +// IR-GPU-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// IR-GPU-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// IR-GPU-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// IR-GPU: then4: +// IR-GPU-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 +// IR-GPU-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 +// IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP43]], ptr align 4 [[TMP41]], i64 400, i1 false) +// IR-GPU-NEXT: br label [[IFCONT6:%.*]] +// IR-GPU: else5: +// IR-GPU-NEXT: br label [[IFCONT6]] +// IR-GPU: ifcont6: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func.2 +// IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// IR-GPU-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr +// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// IR-GPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// IR-GPU-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 63 +// IR-GPU-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// IR-GPU-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 6 +// IR-GPU-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[PRECOND:%.*]] +// IR-GPU: precond: +// IR-GPU-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 100 +// IR-GPU-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] +// IR-GPU: body: +// IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) +// IR-GPU-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// IR-GPU-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// IR-GPU: then: +// IR-GPU-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// IR-GPU-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] +// IR-GPU-NEXT: [[TMP12:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// IR-GPU-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +// IR-GPU-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 +// IR-GPU-NEXT: br label [[IFCONT:%.*]] +// IR-GPU: else: +// IR-GPU-NEXT: br label [[IFCONT]] +// IR-GPU: ifcont: +// IR-GPU-NEXT: call void @__kmpc_barrier(ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), i32 [[TMP2]]) +// IR-GPU-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 +// IR-GPU-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] +// IR-GPU-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] +// IR-GPU: then2: +// IR-GPU-NEXT: [[TMP15:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// IR-GPU-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// IR-GPU-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] +// IR-GPU-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 +// IR-GPU-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// IR-GPU-NEXT: br label [[IFCONT4:%.*]] +// IR-GPU: else3: +// IR-GPU-NEXT: br label [[IFCONT4]] +// IR-GPU: ifcont4: +// IR-GPU-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 +// IR-GPU-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR_ASCAST]], align 4 +// IR-GPU-NEXT: br label [[PRECOND]] +// IR-GPU: exit: +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func +// IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr +// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 +// IR-GPU-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] +// IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP8]], ptr align 4 [[TMP7]], i64 400, i1 false) +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func +// IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr +// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 +// IR-GPU-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 +// IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] +// IR-GPU-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP7]]) #[[ATTR2]] +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func +// IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr +// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 +// IR-GPU-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// IR-GPU-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP5]] +// IR-GPU-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 128 [[TMP8]], i64 400, i1 false) +// IR-GPU-NEXT: ret void +// +// +// IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func +// IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { +// IR-GPU-NEXT: entry: +// IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) +// IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr +// IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr +// IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr +// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr +// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 +// IR-GPU-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 +// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 +// IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 +// IR-GPU-NEXT: [[SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 +// IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x [10 x [10 x i32]]], ptr [[SUM]], i32 0, i32 [[TMP4]] +// IR-GPU-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// IR-GPU-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 +// IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]] +// IR-GPU-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@_Z3foov +// IR-SAME: () #[[ATTR0:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: [[TMP0:%.*]] = load i32, ptr [[J]], align 4 +// IR-NEXT: store i32 [[TMP0]], ptr [[J_CASTED]], align 4 +// IR-NEXT: [[TMP1:%.*]] = load i64, ptr [[J_CASTED]], align 8 +// IR-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22(i64 [[TMP1]], ptr [[SUM]]) #[[ATTR2:[0-9]+]] +// IR-NEXT: ret i32 0 +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 +// IR-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 +// IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load i32, ptr [[J_ADDR]], align 4 +// IR-NEXT: store i32 [[TMP1]], ptr [[J_CASTED]], align 4 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[J_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined, i64 [[TMP2]], ptr [[TMP0]]) +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J4:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 +// IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 +// IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0 +// IR-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 +// IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] +// IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] +// IR: omp.arrayinit.body: +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] +// IR: omp.arrayinit.done: +// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// IR-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[J3]], align 4 +// IR-NEXT: store i32 [[TMP13]], ptr [[J_CASTED]], align 4 +// IR-NEXT: [[TMP14:%.*]] = load i64, ptr [[J_CASTED]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined, i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], ptr [[SUM1]]) +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]]) +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// IR-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR: .omp.lastprivate.then: +// IR-NEXT: store i32 10, ptr [[J3]], align 4 +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[J3]], align 4 +// IR-NEXT: store i32 [[TMP21]], ptr [[J_ADDR]], align 4 +// IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// IR: .omp.lastprivate.done: +// IR-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// IR-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 +// IR-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// IR-NEXT: ] +// IR: .omp.reduction.case1: +// IR-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP26]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR: omp.arraycpy.body: +// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// IR-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// IR: omp.arraycpy.done10: +// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR: .omp.reduction.case2: +// IR-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP0]], [[TMP29]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] +// IR: omp.arraycpy.body12: +// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 +// IR-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP30]] monotonic, align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] +// IR: omp.arraycpy.done18: +// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR: .omp.reduction.default: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J5:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 +// IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 +// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 +// IR-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 +// IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] +// IR: omp.arrayinit.body: +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] +// IR: omp.arrayinit.done: +// IR-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 +// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// IR-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 10 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP13]], 10 +// IR-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[MUL8]] +// IR-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 +// IR-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// IR-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 +// IR-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP14]] +// IR-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP18]], 1 +// IR-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// IR-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// IR-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 +// IR-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// IR-NEXT: ] +// IR: .omp.reduction.case1: +// IR-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR: omp.arraycpy.body: +// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 +// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// IR-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] +// IR: omp.arraycpy.done19: +// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR: .omp.reduction.case2: +// IR-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP0]], [[TMP28]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] +// IR: omp.arraycpy.body21: +// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] +// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 +// IR-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP29]] monotonic, align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] +// IR: omp.arraycpy.done27: +// IR-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR: .omp.reduction.default: +// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// IR-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR: .omp.lastprivate.then: +// IR-NEXT: store i32 10, ptr [[J3]], align 4 +// IR-NEXT: [[TMP33:%.*]] = load i32, ptr [[J3]], align 4 +// IR-NEXT: store i32 [[TMP33]], ptr [[J_ADDR]], align 4 +// IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// IR: .omp.lastprivate.done: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func +// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR: omp.arraycpy.body: +// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// IR: omp.arraycpy.done2: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func +// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR: omp.arraycpy.body: +// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// IR: omp.arraycpy.done2: +// IR-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov +// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-PCH-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr [[J]], align 4 +// IR-PCH-NEXT: store i32 [[TMP0]], ptr [[J_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP1:%.*]] = load i64, ptr [[J_CASTED]], align 8 +// IR-PCH-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22(i64 [[TMP1]], ptr [[SUM]]) #[[ATTR2:[0-9]+]] +// IR-PCH-NEXT: ret i32 0 +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22 +// IR-PCH-SAME: (i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load i32, ptr [[J_ADDR]], align 4 +// IR-PCH-NEXT: store i32 [[TMP1]], ptr [[J_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[J_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined, i64 [[TMP2]], ptr [[TMP0]]) +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J4:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 +// IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0 +// IR-PCH-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] +// IR-PCH: omp.arrayinit.body: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] +// IR-PCH: omp.arrayinit.done: +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// IR-PCH-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[J3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP13]], ptr [[J_CASTED]], align 4 +// IR-PCH-NEXT: [[TMP14:%.*]] = load i64, ptr [[J_CASTED]], align 8 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined, i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], ptr [[SUM1]]) +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]]) +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// IR-PCH-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR-PCH: .omp.lastprivate.then: +// IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4 +// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[J3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP21]], ptr [[J_ADDR]], align 4 +// IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// IR-PCH: .omp.lastprivate.done: +// IR-PCH-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP22]], align 8 +// IR-PCH-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// IR-PCH-NEXT: ] +// IR-PCH: .omp.reduction.case1: +// IR-PCH-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP26]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-PCH: omp.arraycpy.body: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 +// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// IR-PCH-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP26]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// IR-PCH: omp.arraycpy.done10: +// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR-PCH: .omp.reduction.case2: +// IR-PCH-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP0]], [[TMP29]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] +// IR-PCH: omp.arraycpy.body12: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// IR-PCH-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 +// IR-PCH-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP30]] monotonic, align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP29]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] +// IR-PCH: omp.arraycpy.done18: +// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR-PCH: .omp.reduction.default: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J5:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[J]], ptr [[J_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 +// IR-PCH-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] +// IR-PCH: omp.arrayinit.body: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] +// IR-PCH: omp.arrayinit.done: +// IR-PCH-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 +// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// IR-PCH-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 10 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP13]], 10 +// IR-PCH-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[MUL8]] +// IR-PCH-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 +// IR-PCH-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// IR-PCH-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP14]] +// IR-PCH-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP18]], 1 +// IR-PCH-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// IR-PCH-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// IR-PCH-NEXT: store ptr [[SUM4]], ptr [[TMP21]], align 8 +// IR-PCH-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// IR-PCH-NEXT: ] +// IR-PCH: .omp.reduction.case1: +// IR-PCH-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP0]], [[TMP25]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-PCH: omp.arraycpy.body: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 +// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// IR-PCH-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP25]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] +// IR-PCH: omp.arraycpy.done19: +// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR-PCH: .omp.reduction.case2: +// IR-PCH-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP0]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP0]], [[TMP28]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] +// IR-PCH: omp.arraycpy.body21: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP0]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] +// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 +// IR-PCH-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP29]] monotonic, align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP28]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] +// IR-PCH: omp.arraycpy.done27: +// IR-PCH-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR-PCH: .omp.reduction.default: +// IR-PCH-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// IR-PCH-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR-PCH: .omp.lastprivate.then: +// IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4 +// IR-PCH-NEXT: [[TMP33:%.*]] = load i32, ptr [[J3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP33]], ptr [[J_ADDR]], align 4 +// IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// IR-PCH: .omp.lastprivate.done: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp_outlined.omp.reduction.reduction_func +// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-PCH: omp.arraycpy.body: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// IR-PCH: omp.arraycpy.done2: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22.omp_outlined.omp.reduction.reduction_func +// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-PCH: omp.arraycpy.body: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// IR-PCH: omp.arraycpy.done2: +// IR-PCH-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp @@ -0,0 +1,1915 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK3 + +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +#ifdef CK1 + +template +struct SS{ + T a[X][Y]; + + int foo(void) { + + #pragma omp target teams loop collapse(2) + for(int i = 0; i < X; i++) { + for(int j = 0; j < Y; j++) { + a[i][j] = (T)0; + } + } + + // discard loop variables not needed here + + + return a[0][0]; + } +}; + +int teams_template_struct(void) { + SS V; + return V.foo(); + +} +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK9 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK9 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK11 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK11 + +// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +#ifdef CK2 + +template +int tmain(T argc) { + T a[n][m]; + #pragma omp target teams loop collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = (T)0; + } + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int m = 2; + int a[n][m]; + #pragma omp target teams loop collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = 0; + } + } + return tmain(argc); +} + + + + + + + + +// discard loop variables not needed here + + +#endif // CK2 +#endif // #ifndef HEADER +// CHECK1-LABEL: define {{[^@]+}}@_Z21teams_template_structv +// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[V:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZN2SSIiLi123ELx456EE3fooEv(ptr noundef nonnull align 4 dereferenceable(224352) [[V]]) +// CHECK1-NEXT: ret i32 [[CALL]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN2SSIiLi123ELx456EE3fooEv +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(224352) [[THIS:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: ret i32 [[TMP20]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 +// CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined, ptr [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] +// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK1-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK1-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z21teams_template_structv +// CHECK3-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[V:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN2SSIiLi123ELx456EE3fooEv(ptr noundef nonnull align 4 dereferenceable(224352) [[V]]) +// CHECK3-NEXT: ret i32 [[CALL]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN2SSIiLi123ELx456EE3fooEv +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(224352) [[THIS:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: ret i32 [[TMP20]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 +// CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined, ptr [[TMP0]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK3-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK3-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@main +// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 +// CHECK9-NEXT: store i32 100, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 2, ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +// CHECK9-NEXT: [[TMP4:%.*]] = call ptr @llvm.stacksave() +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] +// CHECK9-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP5]], align 4 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[__VLA_EXPR1]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N_CASTED]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[M_CASTED]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[M_CASTED]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] +// CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false) +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 2, ptr [[TMP36]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK9-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CHECK9-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP46]], align 4 +// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 +// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK9-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CHECK9-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK9-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9: omp_offload.failed: +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] +// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK9: omp_offload.cont: +// CHECK9-NEXT: [[TMP51:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP51]]) +// CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 +// CHECK9-NEXT: [[TMP52:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP52]]) +// CHECK9-NEXT: [[TMP53:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP53]] +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 +// CHECK9-SAME: (i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[M_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.omp_outlined, i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK9: land.lhs.true: +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9: omp.precond.then: +// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9: cond.true: +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: br label [[COND_END:%.*]] +// CHECK9: cond.false: +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: br label [[COND_END]] +// CHECK9: cond.end: +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK9: omp.inner.for.cond: +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[M_CASTED]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[M_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.omp_outlined.omp_outlined, i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK9: omp.inner.for.inc: +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK9: omp.inner.for.end: +// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK9: omp.loop.exit: +// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP28]]) +// CHECK9-NEXT: br label [[OMP_PRECOND_END]] +// CHECK9: omp.precond.end: +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.omp_outlined.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK9: land.lhs.true: +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9: omp.precond.then: +// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9: cond.true: +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: br label [[COND_END:%.*]] +// CHECK9: cond.false: +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: br label [[COND_END]] +// CHECK9: cond.end: +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK9: omp.inner.for.cond: +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] +// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] +// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP28]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4 +// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK9: omp.body.continue: +// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK9: omp.inner.for.inc: +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1 +// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK9: omp.inner.for.end: +// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK9: omp.loop.exit: +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]]) +// CHECK9-NEXT: br label [[OMP_PRECOND_END]] +// CHECK9: omp.precond.end: +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_ +// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[A:%.*]] = alloca [10 x [2 x i32]], align 4 +// CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.1, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK9-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK9-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9: omp_offload.failed: +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]] +// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK9: omp_offload.cont: +// CHECK9-NEXT: ret i32 0 +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68 +// CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.omp_outlined, ptr [[TMP0]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9: cond.true: +// CHECK9-NEXT: br label [[COND_END:%.*]] +// CHECK9: cond.false: +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: br label [[COND_END]] +// CHECK9: cond.end: +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK9: omp.inner.for.cond: +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK9: omp.inner.for.inc: +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK9: omp.inner.for.end: +// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK9: omp.loop.exit: +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.omp_outlined.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9: cond.true: +// CHECK9-NEXT: br label [[COND_END:%.*]] +// CHECK9: cond.false: +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: br label [[COND_END]] +// CHECK9: cond.end: +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK9: omp.inner.for.cond: +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 +// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK9: omp.body.continue: +// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK9: omp.inner.for.inc: +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK9: omp.inner.for.end: +// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK9: omp.loop.exit: +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK9-SAME: () #[[ATTR6:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK9-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@main +// CHECK11-SAME: (i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4 +// CHECK11-NEXT: store i32 100, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 2, ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave() +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]] +// CHECK11-NEXT: [[VLA:%.*]] = alloca i32, i32 [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR1]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[M_CASTED]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[M_CASTED]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]] +// CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 +// CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false) +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 2, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8 +// CHECK11-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK11-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CHECK11-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP45]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP46]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK11-NEXT: store i32 0, ptr [[TMP47]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK11-NEXT: br i1 [[TMP49]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11: omp_offload.failed: +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] +// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK11: omp_offload.cont: +// CHECK11-NEXT: [[TMP50:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP50]]) +// CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 +// CHECK11-NEXT: [[TMP51:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP51]]) +// CHECK11-NEXT: [[TMP52:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP52]] +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 +// CHECK11-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.omp_outlined, i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK11: land.lhs.true: +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.then: +// CHECK11-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[M_CASTED]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[M_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.omp_outlined.omp_outlined, i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP30]]) +// CHECK11-NEXT: br label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.end: +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.omp_outlined.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK11: land.lhs.true: +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.then: +// CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK11-NEXT: store i64 [[CONV11]], ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[CONV12]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 +// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] +// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 +// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]] +// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] +// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 +// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] +// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] +// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] +// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]] +// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 +// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] +// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 +// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP1]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4 +// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP29]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4 +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK11: omp.body.continue: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP30]], 1 +// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]]) +// CHECK11-NEXT: br label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.end: +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_ +// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca [10 x [2 x i32]], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.1, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK11-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK11-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK11-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11: omp_offload.failed: +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]] +// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK11: omp_offload.cont: +// CHECK11-NEXT: ret i32 0 +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68 +// CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.omp_outlined, ptr [[TMP0]]) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.omp_outlined.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK11: omp.body.continue: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK11-SAME: () #[[ATTR6:[0-9]+]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK11-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_teams_generic_loop_depend_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_depend_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_teams_generic_loop_depend_codegen.cpp @@ -0,0 +1,3219 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK + +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + + + + + +// Check target registration is registered as a Ctor. + + +template +struct TT{ + tx X; + ty Y; +}; + +int global; +extern int global; + +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT d; + static long *plocal; + + #pragma omp target teams loop device(global + a) depend(in: global) depend(out: a, b, cn[4]) + for (int i = 0; i < 10; ++i) { + } + + + + + + + + #pragma omp target teams loop device(global + a) depend(inout: global, a, bn) if(target:a) + for (int i = 0; i < *plocal; ++i) { + static int local1; + *plocal = global; + local1 = global; + } + + #pragma omp target teams loop if(0) firstprivate(global) depend(out:global) + for (int i = 0; i < global; ++i) { + global += 1; + } + + return a; +} + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions in foo(). + + + + + + + +// Create stack storage and store argument in there. + + + +#endif +// CHECK-64-LABEL: define {{[^@]+}}@_Z3fooi +// CHECK-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK-64-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [4 x %struct.kmp_depend_info], align 8 +// CHECK-64-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[AGG_CAPTURED4:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK-64-NEXT: [[DOTDEP_ARR_ADDR5:%.*]] = alloca [3 x %struct.kmp_depend_info], align 8 +// CHECK-64-NEXT: [[DEP_COUNTER_ADDR6:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[AGG_CAPTURED7:%.*]] = alloca [[STRUCT_ANON_0]], align 8 +// CHECK-64-NEXT: [[DOTDEP_ARR_ADDR8:%.*]] = alloca [3 x %struct.kmp_depend_info], align 8 +// CHECK-64-NEXT: [[DEP_COUNTER_ADDR9:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[GLOBAL_CASTED10:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[AGG_CAPTURED12:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 +// CHECK-64-NEXT: [[DOTDEP_ARR_ADDR13:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 +// CHECK-64-NEXT: [[DEP_COUNTER_ADDR14:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-64-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// CHECK-64-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP2]], align 4 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 +// CHECK-64-NEXT: [[TMP6:%.*]] = mul nuw i64 5, [[TMP5]] +// CHECK-64-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP6]], align 8 +// CHECK-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR1]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* @global, align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i64 40, i64 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK-64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to %struct.kmp_task_t_with_privates* +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP12]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP13]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i8*, i8** [[TMP14]], align 8 +// CHECK-64-NEXT: [[TMP16:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP15]], i8* align 4 [[TMP16]], i64 4, i1 false) +// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP17]], i64 0 +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP18]], i32 0, i32 0 +// CHECK-64-NEXT: store i64 ptrtoint (i32* @global to i64), i64* [[TMP19]], align 8 +// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP18]], i32 0, i32 1 +// CHECK-64-NEXT: store i64 4, i64* [[TMP20]], align 8 +// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP18]], i32 0, i32 2 +// CHECK-64-NEXT: store i8 1, i8* [[TMP21]], align 8 +// CHECK-64-NEXT: [[TMP22:%.*]] = ptrtoint i32* [[A]] to i64 +// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP17]], i64 1 +// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i32 0, i32 0 +// CHECK-64-NEXT: store i64 [[TMP22]], i64* [[TMP24]], align 8 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i32 0, i32 1 +// CHECK-64-NEXT: store i64 4, i64* [[TMP25]], align 8 +// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP23]], i32 0, i32 2 +// CHECK-64-NEXT: store i8 3, i8* [[TMP26]], align 8 +// CHECK-64-NEXT: [[TMP27:%.*]] = ptrtoint [10 x float]* [[B]] to i64 +// CHECK-64-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP17]], i64 2 +// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 0 +// CHECK-64-NEXT: store i64 [[TMP27]], i64* [[TMP29]], align 8 +// CHECK-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 1 +// CHECK-64-NEXT: store i64 40, i64* [[TMP30]], align 8 +// CHECK-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP28]], i32 0, i32 2 +// CHECK-64-NEXT: store i8 3, i8* [[TMP31]], align 8 +// CHECK-64-NEXT: [[TMP32:%.*]] = mul nsw i64 4, [[TMP5]] +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[VLA1]], i64 [[TMP32]] +// CHECK-64-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP5]], 8 +// CHECK-64-NEXT: [[TMP34:%.*]] = ptrtoint double* [[ARRAYIDX]] to i64 +// CHECK-64-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP17]], i64 3 +// CHECK-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 0 +// CHECK-64-NEXT: store i64 [[TMP34]], i64* [[TMP36]], align 8 +// CHECK-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 1 +// CHECK-64-NEXT: store i64 [[TMP33]], i64* [[TMP37]], align 8 +// CHECK-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP35]], i32 0, i32 2 +// CHECK-64-NEXT: store i8 3, i8* [[TMP38]], align 8 +// CHECK-64-NEXT: store i64 4, i64* [[DEP_COUNTER_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP39:%.*]] = bitcast %struct.kmp_depend_info* [[TMP17]] to i8* +// CHECK-64-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 4, i8* [[TMP39]], i32 0, i8* null, i32 0) +// CHECK-64-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP11]]) +// CHECK-64-NEXT: [[TMP40:%.*]] = call i32 @.omp_task_entry.(i32 [[TMP0]], %struct.kmp_task_t_with_privates* [[TMP12]]) #[[ATTR3:[0-9]+]] +// CHECK-64-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP11]]) +// CHECK-64-NEXT: [[TMP41:%.*]] = load i32, i32* @global, align 4 +// CHECK-64-NEXT: [[TMP42:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-64-NEXT: [[TMP43:%.*]] = load i64*, i64** @_ZZ3fooiE6plocal, align 8 +// CHECK-64-NEXT: [[TMP44:%.*]] = load i32, i32* @global, align 4 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP44]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP45:%.*]] = load i64, i64* [[GLOBAL_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP46:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-64: omp_if.then: +// CHECK-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i64** +// CHECK-64-NEXT: store i64* [[TMP43]], i64** [[TMP48]], align 8 +// CHECK-64-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to i64** +// CHECK-64-NEXT: store i64* [[TMP43]], i64** [[TMP50]], align 8 +// CHECK-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* null, i8** [[TMP51]], align 8 +// CHECK-64-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i64* +// CHECK-64-NEXT: store i64 [[TMP45]], i64* [[TMP53]], align 8 +// CHECK-64-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP55:%.*]] = bitcast i8** [[TMP54]] to i64* +// CHECK-64-NEXT: store i64 [[TMP45]], i64* [[TMP55]], align 8 +// CHECK-64-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK-64-NEXT: store i8* null, i8** [[TMP56]], align 8 +// CHECK-64-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED4]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP60:%.*]] = load i64*, i64** @_ZZ3fooiE6plocal, align 8 +// CHECK-64-NEXT: store i64* [[TMP60]], i64** [[TMP59]], align 8 +// CHECK-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED4]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP62:%.*]] = load i32, i32* @global, align 4 +// CHECK-64-NEXT: store i32 [[TMP62]], i32* [[TMP61]], align 8 +// CHECK-64-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED4]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-64-NEXT: store i32 [[TMP64]], i32* [[TMP63]], align 4 +// CHECK-64-NEXT: [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i64 104, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..6 to i32 (i32, i8*)*)) +// CHECK-64-NEXT: [[TMP66:%.*]] = bitcast i8* [[TMP65]] to %struct.kmp_task_t_with_privates.1* +// CHECK-64-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP66]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP67]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP69:%.*]] = load i8*, i8** [[TMP68]], align 8 +// CHECK-64-NEXT: [[TMP70:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED4]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP69]], i8* align 8 [[TMP70]], i64 16, i1 false) +// CHECK-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1]], %struct.kmp_task_t_with_privates.1* [[TMP66]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP72:%.*]] = bitcast i8* [[TMP69]] to %struct.anon.0* +// CHECK-64-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP74:%.*]] = load i64*, i64** @_ZZ3fooiE6plocal, align 8 +// CHECK-64-NEXT: store i64* [[TMP74]], i64** [[TMP73]], align 8 +// CHECK-64-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP76:%.*]] = bitcast [2 x i8*]* [[TMP75]] to i8* +// CHECK-64-NEXT: [[TMP77:%.*]] = bitcast i8** [[TMP57]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP76]], i8* align 8 [[TMP77]], i64 16, i1 false) +// CHECK-64-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP79:%.*]] = bitcast [2 x i8*]* [[TMP78]] to i8* +// CHECK-64-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP58]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP79]], i8* align 8 [[TMP80]], i64 16, i1 false) +// CHECK-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 3 +// CHECK-64-NEXT: [[TMP82:%.*]] = bitcast [2 x i64]* [[TMP81]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP82]], i8* align 8 bitcast ([2 x i64]* @.offload_sizes to i8*), i64 16, i1 false) +// CHECK-64-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP71]], i32 0, i32 4 +// CHECK-64-NEXT: [[TMP84:%.*]] = load i32, i32* @global, align 4 +// CHECK-64-NEXT: store i32 [[TMP84]], i32* [[TMP83]], align 8 +// CHECK-64-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR5]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP86:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP85]], i64 0 +// CHECK-64-NEXT: [[TMP87:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i32 0, i32 0 +// CHECK-64-NEXT: store i64 ptrtoint (i32* @global to i64), i64* [[TMP87]], align 8 +// CHECK-64-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i32 0, i32 1 +// CHECK-64-NEXT: store i64 4, i64* [[TMP88]], align 8 +// CHECK-64-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP86]], i32 0, i32 2 +// CHECK-64-NEXT: store i8 3, i8* [[TMP89]], align 8 +// CHECK-64-NEXT: [[TMP90:%.*]] = ptrtoint i32* [[A]] to i64 +// CHECK-64-NEXT: [[TMP91:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP85]], i64 1 +// CHECK-64-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP91]], i32 0, i32 0 +// CHECK-64-NEXT: store i64 [[TMP90]], i64* [[TMP92]], align 8 +// CHECK-64-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP91]], i32 0, i32 1 +// CHECK-64-NEXT: store i64 4, i64* [[TMP93]], align 8 +// CHECK-64-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP91]], i32 0, i32 2 +// CHECK-64-NEXT: store i8 3, i8* [[TMP94]], align 8 +// CHECK-64-NEXT: [[TMP95:%.*]] = mul nuw i64 [[TMP2]], 4 +// CHECK-64-NEXT: [[TMP96:%.*]] = ptrtoint float* [[VLA]] to i64 +// CHECK-64-NEXT: [[TMP97:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP85]], i64 2 +// CHECK-64-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP97]], i32 0, i32 0 +// CHECK-64-NEXT: store i64 [[TMP96]], i64* [[TMP98]], align 8 +// CHECK-64-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP97]], i32 0, i32 1 +// CHECK-64-NEXT: store i64 [[TMP95]], i64* [[TMP99]], align 8 +// CHECK-64-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP97]], i32 0, i32 2 +// CHECK-64-NEXT: store i8 3, i8* [[TMP100]], align 8 +// CHECK-64-NEXT: store i64 3, i64* [[DEP_COUNTER_ADDR6]], align 8 +// CHECK-64-NEXT: [[TMP101:%.*]] = bitcast %struct.kmp_depend_info* [[TMP85]] to i8* +// CHECK-64-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 3, i8* [[TMP101]], i32 0, i8* null, i32 0) +// CHECK-64-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP65]]) +// CHECK-64-NEXT: [[TMP102:%.*]] = call i32 @.omp_task_entry..6(i32 [[TMP0]], %struct.kmp_task_t_with_privates.1* [[TMP66]]) #[[ATTR3]] +// CHECK-64-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP65]]) +// CHECK-64-NEXT: br label [[OMP_IF_END:%.*]] +// CHECK-64: omp_if.else: +// CHECK-64-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED7]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP104:%.*]] = load i64*, i64** @_ZZ3fooiE6plocal, align 8 +// CHECK-64-NEXT: store i64* [[TMP104]], i64** [[TMP103]], align 8 +// CHECK-64-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED7]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP106:%.*]] = load i32, i32* @global, align 4 +// CHECK-64-NEXT: store i32 [[TMP106]], i32* [[TMP105]], align 8 +// CHECK-64-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED7]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP108:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-64-NEXT: store i32 [[TMP108]], i32* [[TMP107]], align 4 +// CHECK-64-NEXT: [[TMP109:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i64 56, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..9 to i32 (i32, i8*)*)) +// CHECK-64-NEXT: [[TMP110:%.*]] = bitcast i8* [[TMP109]] to %struct.kmp_task_t_with_privates.2* +// CHECK-64-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP110]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP111]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP113:%.*]] = load i8*, i8** [[TMP112]], align 8 +// CHECK-64-NEXT: [[TMP114:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED7]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP113]], i8* align 8 [[TMP114]], i64 16, i1 false) +// CHECK-64-NEXT: [[TMP115:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP110]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP116:%.*]] = bitcast i8* [[TMP113]] to %struct.anon.0* +// CHECK-64-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP115]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP118:%.*]] = load i64*, i64** @_ZZ3fooiE6plocal, align 8 +// CHECK-64-NEXT: store i64* [[TMP118]], i64** [[TMP117]], align 8 +// CHECK-64-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP115]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP120:%.*]] = load i32, i32* @global, align 4 +// CHECK-64-NEXT: store i32 [[TMP120]], i32* [[TMP119]], align 8 +// CHECK-64-NEXT: [[TMP121:%.*]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR8]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP122:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP121]], i64 0 +// CHECK-64-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP122]], i32 0, i32 0 +// CHECK-64-NEXT: store i64 ptrtoint (i32* @global to i64), i64* [[TMP123]], align 8 +// CHECK-64-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP122]], i32 0, i32 1 +// CHECK-64-NEXT: store i64 4, i64* [[TMP124]], align 8 +// CHECK-64-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP122]], i32 0, i32 2 +// CHECK-64-NEXT: store i8 3, i8* [[TMP125]], align 8 +// CHECK-64-NEXT: [[TMP126:%.*]] = ptrtoint i32* [[A]] to i64 +// CHECK-64-NEXT: [[TMP127:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP121]], i64 1 +// CHECK-64-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP127]], i32 0, i32 0 +// CHECK-64-NEXT: store i64 [[TMP126]], i64* [[TMP128]], align 8 +// CHECK-64-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP127]], i32 0, i32 1 +// CHECK-64-NEXT: store i64 4, i64* [[TMP129]], align 8 +// CHECK-64-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP127]], i32 0, i32 2 +// CHECK-64-NEXT: store i8 3, i8* [[TMP130]], align 8 +// CHECK-64-NEXT: [[TMP131:%.*]] = mul nuw i64 [[TMP2]], 4 +// CHECK-64-NEXT: [[TMP132:%.*]] = ptrtoint float* [[VLA]] to i64 +// CHECK-64-NEXT: [[TMP133:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP121]], i64 2 +// CHECK-64-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 0 +// CHECK-64-NEXT: store i64 [[TMP132]], i64* [[TMP134]], align 8 +// CHECK-64-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 1 +// CHECK-64-NEXT: store i64 [[TMP131]], i64* [[TMP135]], align 8 +// CHECK-64-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP133]], i32 0, i32 2 +// CHECK-64-NEXT: store i8 3, i8* [[TMP136]], align 8 +// CHECK-64-NEXT: store i64 3, i64* [[DEP_COUNTER_ADDR9]], align 8 +// CHECK-64-NEXT: [[TMP137:%.*]] = bitcast %struct.kmp_depend_info* [[TMP121]] to i8* +// CHECK-64-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 3, i8* [[TMP137]], i32 0, i8* null, i32 0) +// CHECK-64-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP109]]) +// CHECK-64-NEXT: [[TMP138:%.*]] = call i32 @.omp_task_entry..9(i32 [[TMP0]], %struct.kmp_task_t_with_privates.2* [[TMP110]]) #[[ATTR3]] +// CHECK-64-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP109]]) +// CHECK-64-NEXT: br label [[OMP_IF_END]] +// CHECK-64: omp_if.end: +// CHECK-64-NEXT: [[TMP139:%.*]] = load i32, i32* @global, align 4 +// CHECK-64-NEXT: [[CONV11:%.*]] = bitcast i64* [[GLOBAL_CASTED10]] to i32* +// CHECK-64-NEXT: store i32 [[TMP139]], i32* [[CONV11]], align 4 +// CHECK-64-NEXT: [[TMP140:%.*]] = load i64, i64* [[GLOBAL_CASTED10]], align 8 +// CHECK-64-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED12]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP142:%.*]] = load i32, i32* @global, align 4 +// CHECK-64-NEXT: store i32 [[TMP142]], i32* [[TMP141]], align 4 +// CHECK-64-NEXT: [[TMP143:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i64 48, i64 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.5*)* @.omp_task_entry..14 to i32 (i32, i8*)*)) +// CHECK-64-NEXT: [[TMP144:%.*]] = bitcast i8* [[TMP143]] to %struct.kmp_task_t_with_privates.5* +// CHECK-64-NEXT: [[TMP145:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP144]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP145]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP147:%.*]] = load i8*, i8** [[TMP146]], align 8 +// CHECK-64-NEXT: [[TMP148:%.*]] = bitcast %struct.anon.4* [[AGG_CAPTURED12]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP147]], i8* align 4 [[TMP148]], i64 4, i1 false) +// CHECK-64-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5]], %struct.kmp_task_t_with_privates.5* [[TMP144]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP150:%.*]] = bitcast i8* [[TMP147]] to %struct.anon.4* +// CHECK-64-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_6:%.*]], %struct..kmp_privates.t.6* [[TMP149]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP152:%.*]] = load i32, i32* @global, align 4 +// CHECK-64-NEXT: store i32 [[TMP152]], i32* [[TMP151]], align 8 +// CHECK-64-NEXT: [[TMP153:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR13]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP154:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP153]], i64 0 +// CHECK-64-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP154]], i32 0, i32 0 +// CHECK-64-NEXT: store i64 ptrtoint (i32* @global to i64), i64* [[TMP155]], align 8 +// CHECK-64-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP154]], i32 0, i32 1 +// CHECK-64-NEXT: store i64 4, i64* [[TMP156]], align 8 +// CHECK-64-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP154]], i32 0, i32 2 +// CHECK-64-NEXT: store i8 3, i8* [[TMP157]], align 8 +// CHECK-64-NEXT: store i64 1, i64* [[DEP_COUNTER_ADDR14]], align 8 +// CHECK-64-NEXT: [[TMP158:%.*]] = bitcast %struct.kmp_depend_info* [[TMP153]] to i8* +// CHECK-64-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i8* [[TMP158]], i32 0, i8* null, i32 0) +// CHECK-64-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP143]]) +// CHECK-64-NEXT: [[TMP159:%.*]] = call i32 @.omp_task_entry..14(i32 [[TMP0]], %struct.kmp_task_t_with_privates.5* [[TMP144]]) #[[ATTR3]] +// CHECK-64-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP143]]) +// CHECK-64-NEXT: [[TMP160:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP161:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP161]]) +// CHECK-64-NEXT: ret i32 [[TMP160]] +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66 +// CHECK-64-SAME: () #[[ATTR2:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..1 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_task_entry. +// CHECK-64-SAME: (i32 noundef signext [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK-64-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK-64-NEXT: [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-64-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-64-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) +// CHECK-64-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !16 +// CHECK-64-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !16 +// CHECK-64-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !16 +// CHECK-64-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !16 +// CHECK-64-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !16 +// CHECK-64-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !16 +// CHECK-64-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !16 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP10]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, i32* [[TMP14]], align 4, !noalias !16 +// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 0, i32* [[TMP15]], align 4, !noalias !16 +// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 2 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP16]], align 8, !noalias !16 +// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 3 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP17]], align 8, !noalias !16 +// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 4 +// CHECK-64-NEXT: store i64* null, i64** [[TMP18]], align 8, !noalias !16 +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 5 +// CHECK-64-NEXT: store i64* null, i64** [[TMP19]], align 8, !noalias !16 +// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 6 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP20]], align 8, !noalias !16 +// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 7 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP21]], align 8, !noalias !16 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 10, i64* [[TMP22]], align 8, !noalias !16 +// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, i64* [[TMP23]], align 8, !noalias !16 +// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP24]], align 4, !noalias !16 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4, !noalias !16 +// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, i32* [[TMP26]], align 4, !noalias !16 +// CHECK-64-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 [[TMP13]], i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]]) +// CHECK-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-64-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__2_EXIT:%.*]] +// CHECK-64: omp_offload.failed.i: +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66() #[[ATTR3]] +// CHECK-64-NEXT: br label [[DOTOMP_OUTLINED__2_EXIT]] +// CHECK-64: .omp_outlined..2.exit: +// CHECK-64-NEXT: ret i32 0 +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76 +// CHECK-64-SAME: (i64* noundef [[PLOCAL:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: store i64* [[PLOCAL]], i64** [[PLOCAL_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32* +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GLOBAL_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[GLOBAL_CASTED]], align 8 +// CHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64* [[TMP0]], i64 [[TMP2]]) +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..3 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64* noundef [[PLOCAL:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64* [[PLOCAL]], i64** [[PLOCAL_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32* +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8 +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[DOTCAPTURE_EXPR_]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8 +// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP2]], 0 +// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i64 [[SUB]], 1 +// CHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[DIV]] to i32 +// CHECK-64-NEXT: [[SUB3:%.*]] = sub nsw i32 [[CONV2]], 1 +// CHECK-64-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i64 0, [[TMP3]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-64: omp.precond.then: +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[CONV7:%.*]] = bitcast i64* [[GLOBAL_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP19]], i32* [[CONV7]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i64, i64* [[GLOBAL_CASTED]], align 8 +// CHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64* [[TMP18]], i64 [[TMP20]]) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP24]]) +// CHECK-64-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-64: omp.precond.end: +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..4 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64* noundef [[PLOCAL:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i64*, align 8 +// CHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i64* [[PLOCAL]], i64** [[PLOCAL_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32* +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8 +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[DOTCAPTURE_EXPR_]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8 +// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP2]], 0 +// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i64 [[SUB]], 1 +// CHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[DIV]] to i32 +// CHECK-64-NEXT: [[SUB3:%.*]] = sub nsw i32 [[CONV2]], 1 +// CHECK-64-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i64 0, [[TMP3]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-64: omp.precond.then: +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK-64-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK-64-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK-64-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[CONV9:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[CONV9]], i64* [[TMP18]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: store i32 [[TMP19]], i32* @_ZZ3fooiE6local1, align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK-64-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP22]]) +// CHECK-64-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-64: omp.precond.end: +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// CHECK-64-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i64*** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]], [2 x i8*]** noalias noundef [[TMP3:%.*]], [2 x i8*]** noalias noundef [[TMP4:%.*]], [2 x i64]** noalias noundef [[TMP5:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i64***, align 8 +// CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8 +// CHECK-64-NEXT: [[DOTADDR3:%.*]] = alloca [2 x i8*]**, align 8 +// CHECK-64-NEXT: [[DOTADDR4:%.*]] = alloca [2 x i8*]**, align 8 +// CHECK-64-NEXT: [[DOTADDR5:%.*]] = alloca [2 x i64]**, align 8 +// CHECK-64-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i64*** [[TMP1]], i64**** [[DOTADDR1]], align 8 +// CHECK-64-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8 +// CHECK-64-NEXT: store [2 x i8*]** [[TMP3]], [2 x i8*]*** [[DOTADDR3]], align 8 +// CHECK-64-NEXT: store [2 x i8*]** [[TMP4]], [2 x i8*]*** [[DOTADDR4]], align 8 +// CHECK-64-NEXT: store [2 x i64]** [[TMP5]], [2 x i64]*** [[DOTADDR5]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i64***, i64**** [[DOTADDR1]], align 8 +// CHECK-64-NEXT: store i64** [[TMP7]], i64*** [[TMP8]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = load [2 x i8*]**, [2 x i8*]*** [[DOTADDR3]], align 8 +// CHECK-64-NEXT: store [2 x i8*]* [[TMP9]], [2 x i8*]** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP12:%.*]] = load [2 x i8*]**, [2 x i8*]*** [[DOTADDR4]], align 8 +// CHECK-64-NEXT: store [2 x i8*]* [[TMP11]], [2 x i8*]** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 3 +// CHECK-64-NEXT: [[TMP14:%.*]] = load [2 x i64]**, [2 x i64]*** [[DOTADDR5]], align 8 +// CHECK-64-NEXT: store [2 x i64]* [[TMP13]], [2 x i64]** [[TMP14]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8 +// CHECK-64-NEXT: store i32* [[TMP15]], i32** [[TMP16]], align 8 +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_task_entry..6 +// CHECK-64-SAME: (i32 noundef signext [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK-64-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i64**, align 8 +// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x i8*]*, align 8 +// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca [2 x i8*]*, align 8 +// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR4_I:%.*]] = alloca [2 x i64]*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__I:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_5_I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[GLOBAL_CASTED_I:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.1*, align 8 +// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-64-NEXT: store %struct.kmp_task_t_with_privates.1* [[TMP1]], %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.1*, %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP3]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1]], %struct.kmp_task_t_with_privates.1* [[TMP3]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK-64-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !26 +// CHECK-64-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !26 +// CHECK-64-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26 +// CHECK-64-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i64***, i32**, [2 x i8*]**, [2 x i8*]**, [2 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26 +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !26 +// CHECK-64-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i64***, i32**, [2 x i8*]**, [2 x i8*]**, [2 x i64]**)* +// CHECK-64-NEXT: call void [[TMP15]](i8* [[TMP14]], i64*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], [2 x i64]** [[DOTFIRSTPRIV_PTR_ADDR4_I]]) #[[ATTR3]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i64**, i64*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP18:%.*]] = load [2 x i8*]*, [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP19:%.*]] = load [2 x i8*]*, [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP20:%.*]] = load [2 x i64]*, [2 x i64]** [[DOTFIRSTPRIV_PTR_ADDR4_I]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP18]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP19]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[TMP20]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i64*, i64** [[TMP16]], align 8 +// CHECK-64-NEXT: [[TMP28:%.*]] = load i64, i64* [[TMP27]], align 8 +// CHECK-64-NEXT: store i64 [[TMP28]], i64* [[DOTCAPTURE_EXPR__I]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__I]], align 8, !noalias !26 +// CHECK-64-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32 +// CHECK-64-NEXT: [[SUB6_I:%.*]] = sub nsw i32 [[CONV_I]], 1 +// CHECK-64-NEXT: store i32 [[SUB6_I]], i32* [[DOTCAPTURE_EXPR_5_I]], align 4, !noalias !26 +// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5_I]], align 4, !noalias !26 +// CHECK-64-NEXT: [[ADD_I:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK-64-NEXT: [[TMP31:%.*]] = zext i32 [[ADD_I]] to i64 +// CHECK-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, i32* [[TMP32]], align 4, !noalias !26 +// CHECK-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 2, i32* [[TMP33]], align 4, !noalias !26 +// CHECK-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 2 +// CHECK-64-NEXT: store i8** [[TMP21]], i8*** [[TMP34]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 3 +// CHECK-64-NEXT: store i8** [[TMP22]], i8*** [[TMP35]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 4 +// CHECK-64-NEXT: store i64* [[TMP23]], i64** [[TMP36]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 5 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP37]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 6 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP38]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 7 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP39]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 [[TMP31]], i64* [[TMP40]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, i64* [[TMP41]], align 8, !noalias !26 +// CHECK-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP42]], align 4, !noalias !26 +// CHECK-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP43]], align 4, !noalias !26 +// CHECK-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, i32* [[TMP44]], align 4, !noalias !26 +// CHECK-64-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 [[TMP26]], i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]]) +// CHECK-64-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK-64-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__5_EXIT:%.*]] +// CHECK-64: omp_offload.failed.i: +// CHECK-64-NEXT: [[TMP47:%.*]] = load i64*, i64** [[TMP16]], align 8 +// CHECK-64-NEXT: [[TMP48:%.*]] = load i32, i32* @global, align 4, !noalias !26 +// CHECK-64-NEXT: [[CONV7_I:%.*]] = bitcast i64* [[GLOBAL_CASTED_I]] to i32* +// CHECK-64-NEXT: store i32 [[TMP48]], i32* [[CONV7_I]], align 4, !noalias !26 +// CHECK-64-NEXT: [[TMP49:%.*]] = load i64, i64* [[GLOBAL_CASTED_I]], align 8, !noalias !26 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76(i64* [[TMP47]], i64 [[TMP49]]) #[[ATTR3]] +// CHECK-64-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT]] +// CHECK-64: .omp_outlined..5.exit: +// CHECK-64-NEXT: ret i32 0 +// CHECK-64-LABEL: define {{[^@]+}}@.omp_task_privates_map..8 +// CHECK-64-SAME: (%struct..kmp_privates.t.3* noalias noundef [[TMP0:%.*]], i64*** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]]) #[[ATTR7]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.3*, align 8 +// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i64***, align 8 +// CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8 +// CHECK-64-NEXT: store %struct..kmp_privates.t.3* [[TMP0]], %struct..kmp_privates.t.3** [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i64*** [[TMP1]], i64**** [[DOTADDR1]], align 8 +// CHECK-64-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t.3*, %struct..kmp_privates.t.3** [[DOTADDR]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64***, i64**** [[DOTADDR1]], align 8 +// CHECK-64-NEXT: store i64** [[TMP4]], i64*** [[TMP5]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8 +// CHECK-64-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_task_entry..9 +// CHECK-64-SAME: (i32 noundef signext [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK-64-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i64**, align 8 +// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[GLOBAL_CASTED_I:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-64-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.3* [[TMP9]] to i8* +// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]]) +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]]) +// CHECK-64-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !36 +// CHECK-64-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !36 +// CHECK-64-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !36 +// CHECK-64-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i64***, i32**)* @.omp_task_privates_map..8 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !36 +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !36 +// CHECK-64-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !36 +// CHECK-64-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !36 +// CHECK-64-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !36 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !36 +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i64***, i32**)* +// CHECK-64-NEXT: call void [[TMP15]](i8* [[TMP14]], i64*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i64**, i64*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !36 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !36 +// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i64*, i64** [[TMP16]], align 8 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* @global, align 4, !noalias !36 +// CHECK-64-NEXT: [[CONV_I:%.*]] = bitcast i64* [[GLOBAL_CASTED_I]] to i32* +// CHECK-64-NEXT: store i32 [[TMP20]], i32* [[CONV_I]], align 4, !noalias !36 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i64, i64* [[GLOBAL_CASTED_I]], align 8, !noalias !36 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76(i64* [[TMP19]], i64 [[TMP21]]) #[[ATTR3]] +// CHECK-64-NEXT: ret i32 0 +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l83 +// CHECK-64-SAME: (i64 noundef [[GLOBAL:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32* +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GLOBAL_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[GLOBAL_CASTED]], align 8 +// CHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i64 [[TMP1]]) +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..10 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32* +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-64-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-64: omp.precond.then: +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[CONV6:%.*]] = bitcast i64* [[GLOBAL_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP17]], i32* [[CONV6]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i64, i64* [[GLOBAL_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK-64-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]]) +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK-64-NEXT: call void @.omp_outlined..11(i32* [[TMP21]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]]) #[[ATTR3]] +// CHECK-64-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP20]]) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]]) +// CHECK-64-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-64: omp.precond.end: +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..11 +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32* +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-64-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-64: omp.precond.then: +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK-64-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK-64-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP19]]) +// CHECK-64-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-64: omp.precond.end: +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_task_privates_map..13 +// CHECK-64-SAME: (%struct..kmp_privates.t.6* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.6*, align 8 +// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK-64-NEXT: store %struct..kmp_privates.t.6* [[TMP0]], %struct..kmp_privates.t.6** [[DOTADDR]], align 8 +// CHECK-64-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.6*, %struct..kmp_privates.t.6** [[DOTADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_6:%.*]], %struct..kmp_privates.t.6* [[TMP2]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK-64-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_task_entry..14 +// CHECK-64-SAME: (i32 noundef signext [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK-64-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK-64-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[GLOBAL_CASTED_I:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.5*, align 8 +// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-64-NEXT: store %struct.kmp_task_t_with_privates.5* [[TMP1]], %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.5*, %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP3]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5]], %struct.kmp_task_t_with_privates.5* [[TMP3]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.6* [[TMP9]] to i8* +// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]]) +// CHECK-64-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META44:![0-9]+]]) +// CHECK-64-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !46 +// CHECK-64-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !46 +// CHECK-64-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !46 +// CHECK-64-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.6*, i32**)* @.omp_task_privates_map..13 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !46 +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !46 +// CHECK-64-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !46 +// CHECK-64-NEXT: [[TMP12:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !46 +// CHECK-64-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !46 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !46 +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* +// CHECK-64-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR3]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !46 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK-64-NEXT: [[CONV_I:%.*]] = bitcast i64* [[GLOBAL_CASTED_I]] to i32* +// CHECK-64-NEXT: store i32 [[TMP17]], i32* [[CONV_I]], align 4, !noalias !46 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i64, i64* [[GLOBAL_CASTED_I]], align 8, !noalias !46 +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l83(i64 [[TMP18]]) #[[ATTR3]] +// CHECK-64-NEXT: ret i32 0 +// CHECK-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK-64-SAME: () #[[ATTR7]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-64-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@_Z3fooi +// CHECK-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK-32-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [4 x %struct.kmp_depend_info], align 4 +// CHECK-32-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[AGG_CAPTURED4:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-32-NEXT: [[DOTDEP_ARR_ADDR5:%.*]] = alloca [3 x %struct.kmp_depend_info], align 4 +// CHECK-32-NEXT: [[DEP_COUNTER_ADDR6:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AGG_CAPTURED7:%.*]] = alloca [[STRUCT_ANON_0]], align 4 +// CHECK-32-NEXT: [[DOTDEP_ARR_ADDR8:%.*]] = alloca [3 x %struct.kmp_depend_info], align 4 +// CHECK-32-NEXT: [[DEP_COUNTER_ADDR9:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[GLOBAL_CASTED10:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AGG_CAPTURED11:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 +// CHECK-32-NEXT: [[DOTDEP_ARR_ADDR12:%.*]] = alloca [1 x %struct.kmp_depend_info], align 4 +// CHECK-32-NEXT: [[DEP_COUNTER_ADDR13:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-32-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK-32-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = mul nuw i32 5, [[TMP3]] +// CHECK-32-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP4]], align 8 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[__VLA_EXPR1]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* @global, align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[TMP7]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 20, i32 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates* +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP10]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 4, i1 false) +// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x %struct.kmp_depend_info], [4 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], %struct.kmp_depend_info* [[TMP15]], i32 0 +// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP16]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 ptrtoint (i32* @global to i32), i32* [[TMP17]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP16]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 4, i32* [[TMP18]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP16]], i32 0, i32 2 +// CHECK-32-NEXT: store i8 1, i8* [[TMP19]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = ptrtoint i32* [[A]] to i32 +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP15]], i32 1 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP21]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 [[TMP20]], i32* [[TMP22]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP21]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 4, i32* [[TMP23]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP21]], i32 0, i32 2 +// CHECK-32-NEXT: store i8 3, i8* [[TMP24]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = ptrtoint [10 x float]* [[B]] to i32 +// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP15]], i32 2 +// CHECK-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP26]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 [[TMP25]], i32* [[TMP27]], align 4 +// CHECK-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP26]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 40, i32* [[TMP28]], align 4 +// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP26]], i32 0, i32 2 +// CHECK-32-NEXT: store i8 3, i8* [[TMP29]], align 4 +// CHECK-32-NEXT: [[TMP30:%.*]] = mul nsw i32 4, [[TMP3]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[VLA1]], i32 [[TMP30]] +// CHECK-32-NEXT: [[TMP31:%.*]] = mul nuw i32 [[TMP3]], 8 +// CHECK-32-NEXT: [[TMP32:%.*]] = ptrtoint double* [[ARRAYIDX]] to i32 +// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP15]], i32 3 +// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 [[TMP32]], i32* [[TMP34]], align 4 +// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 [[TMP31]], i32* [[TMP35]], align 4 +// CHECK-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP33]], i32 0, i32 2 +// CHECK-32-NEXT: store i8 3, i8* [[TMP36]], align 4 +// CHECK-32-NEXT: store i32 4, i32* [[DEP_COUNTER_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP37:%.*]] = bitcast %struct.kmp_depend_info* [[TMP15]] to i8* +// CHECK-32-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 4, i8* [[TMP37]], i32 0, i8* null, i32 0) +// CHECK-32-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP9]]) +// CHECK-32-NEXT: [[TMP38:%.*]] = call i32 @.omp_task_entry.(i32 [[TMP0]], %struct.kmp_task_t_with_privates* [[TMP10]]) #[[ATTR3:[0-9]+]] +// CHECK-32-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP9]]) +// CHECK-32-NEXT: [[TMP39:%.*]] = load i32, i32* @global, align 4 +// CHECK-32-NEXT: [[TMP40:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-NEXT: [[TMP41:%.*]] = load i32*, i32** @_ZZ3fooiE6plocal, align 4 +// CHECK-32-NEXT: [[TMP42:%.*]] = load i32, i32* @global, align 4 +// CHECK-32-NEXT: store i32 [[TMP42]], i32* [[GLOBAL_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP43:%.*]] = load i32, i32* [[GLOBAL_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP44:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-32: omp_if.then: +// CHECK-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP46:%.*]] = bitcast i8** [[TMP45]] to i32** +// CHECK-32-NEXT: store i32* [[TMP41]], i32** [[TMP46]], align 4 +// CHECK-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP48:%.*]] = bitcast i8** [[TMP47]] to i32** +// CHECK-32-NEXT: store i32* [[TMP41]], i32** [[TMP48]], align 4 +// CHECK-32-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* null, i8** [[TMP49]], align 4 +// CHECK-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP51:%.*]] = bitcast i8** [[TMP50]] to i32* +// CHECK-32-NEXT: store i32 [[TMP43]], i32* [[TMP51]], align 4 +// CHECK-32-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to i32* +// CHECK-32-NEXT: store i32 [[TMP43]], i32* [[TMP53]], align 4 +// CHECK-32-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK-32-NEXT: store i8* null, i8** [[TMP54]], align 4 +// CHECK-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED4]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP58:%.*]] = load i32*, i32** @_ZZ3fooiE6plocal, align 4 +// CHECK-32-NEXT: store i32* [[TMP58]], i32** [[TMP57]], align 4 +// CHECK-32-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED4]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP60:%.*]] = load i32, i32* @global, align 4 +// CHECK-32-NEXT: store i32 [[TMP60]], i32* [[TMP59]], align 4 +// CHECK-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED4]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP62:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-NEXT: store i32 [[TMP62]], i32* [[TMP61]], align 4 +// CHECK-32-NEXT: [[TMP63:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 60, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..6 to i32 (i32, i8*)*)) +// CHECK-32-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP63]] to %struct.kmp_task_t_with_privates.1* +// CHECK-32-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP64]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP65]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP67:%.*]] = load i8*, i8** [[TMP66]], align 4 +// CHECK-32-NEXT: [[TMP68:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED4]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP67]], i8* align 4 [[TMP68]], i32 12, i1 false) +// CHECK-32-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1]], %struct.kmp_task_t_with_privates.1* [[TMP64]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP70:%.*]] = bitcast i8* [[TMP67]] to %struct.anon.0* +// CHECK-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP69]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP72:%.*]] = bitcast [2 x i64]* [[TMP71]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP72]], i8* align 4 bitcast ([2 x i64]* @.offload_sizes to i8*), i32 16, i1 false) +// CHECK-32-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP69]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP74:%.*]] = load i32*, i32** @_ZZ3fooiE6plocal, align 4 +// CHECK-32-NEXT: store i32* [[TMP74]], i32** [[TMP73]], align 4 +// CHECK-32-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP69]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP76:%.*]] = load i32, i32* @global, align 4 +// CHECK-32-NEXT: store i32 [[TMP76]], i32* [[TMP75]], align 4 +// CHECK-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP69]], i32 0, i32 3 +// CHECK-32-NEXT: [[TMP78:%.*]] = bitcast [2 x i8*]* [[TMP77]] to i8* +// CHECK-32-NEXT: [[TMP79:%.*]] = bitcast i8** [[TMP55]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP78]], i8* align 4 [[TMP79]], i32 8, i1 false) +// CHECK-32-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP69]], i32 0, i32 4 +// CHECK-32-NEXT: [[TMP81:%.*]] = bitcast [2 x i8*]* [[TMP80]] to i8* +// CHECK-32-NEXT: [[TMP82:%.*]] = bitcast i8** [[TMP56]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP81]], i8* align 4 [[TMP82]], i32 8, i1 false) +// CHECK-32-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR5]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP84:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP83]], i32 0 +// CHECK-32-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP84]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 ptrtoint (i32* @global to i32), i32* [[TMP85]], align 4 +// CHECK-32-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP84]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 4, i32* [[TMP86]], align 4 +// CHECK-32-NEXT: [[TMP87:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP84]], i32 0, i32 2 +// CHECK-32-NEXT: store i8 3, i8* [[TMP87]], align 4 +// CHECK-32-NEXT: [[TMP88:%.*]] = ptrtoint i32* [[A]] to i32 +// CHECK-32-NEXT: [[TMP89:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP83]], i32 1 +// CHECK-32-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 [[TMP88]], i32* [[TMP90]], align 4 +// CHECK-32-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 4, i32* [[TMP91]], align 4 +// CHECK-32-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP89]], i32 0, i32 2 +// CHECK-32-NEXT: store i8 3, i8* [[TMP92]], align 4 +// CHECK-32-NEXT: [[TMP93:%.*]] = mul nuw i32 [[TMP1]], 4 +// CHECK-32-NEXT: [[TMP94:%.*]] = ptrtoint float* [[VLA]] to i32 +// CHECK-32-NEXT: [[TMP95:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP83]], i32 2 +// CHECK-32-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP95]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 [[TMP94]], i32* [[TMP96]], align 4 +// CHECK-32-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP95]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 [[TMP93]], i32* [[TMP97]], align 4 +// CHECK-32-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP95]], i32 0, i32 2 +// CHECK-32-NEXT: store i8 3, i8* [[TMP98]], align 4 +// CHECK-32-NEXT: store i32 3, i32* [[DEP_COUNTER_ADDR6]], align 4 +// CHECK-32-NEXT: [[TMP99:%.*]] = bitcast %struct.kmp_depend_info* [[TMP83]] to i8* +// CHECK-32-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 3, i8* [[TMP99]], i32 0, i8* null, i32 0) +// CHECK-32-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP63]]) +// CHECK-32-NEXT: [[TMP100:%.*]] = call i32 @.omp_task_entry..6(i32 [[TMP0]], %struct.kmp_task_t_with_privates.1* [[TMP64]]) #[[ATTR3]] +// CHECK-32-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP63]]) +// CHECK-32-NEXT: br label [[OMP_IF_END:%.*]] +// CHECK-32: omp_if.else: +// CHECK-32-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED7]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP102:%.*]] = load i32*, i32** @_ZZ3fooiE6plocal, align 4 +// CHECK-32-NEXT: store i32* [[TMP102]], i32** [[TMP101]], align 4 +// CHECK-32-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED7]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP104:%.*]] = load i32, i32* @global, align 4 +// CHECK-32-NEXT: store i32 [[TMP104]], i32* [[TMP103]], align 4 +// CHECK-32-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED7]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP106:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-NEXT: store i32 [[TMP106]], i32* [[TMP105]], align 4 +// CHECK-32-NEXT: [[TMP107:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 28, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..9 to i32 (i32, i8*)*)) +// CHECK-32-NEXT: [[TMP108:%.*]] = bitcast i8* [[TMP107]] to %struct.kmp_task_t_with_privates.2* +// CHECK-32-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP108]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP109]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP111:%.*]] = load i8*, i8** [[TMP110]], align 4 +// CHECK-32-NEXT: [[TMP112:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED7]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP111]], i8* align 4 [[TMP112]], i32 12, i1 false) +// CHECK-32-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP108]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP114:%.*]] = bitcast i8* [[TMP111]] to %struct.anon.0* +// CHECK-32-NEXT: [[TMP115:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP113]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP116:%.*]] = load i32*, i32** @_ZZ3fooiE6plocal, align 4 +// CHECK-32-NEXT: store i32* [[TMP116]], i32** [[TMP115]], align 4 +// CHECK-32-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP113]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP118:%.*]] = load i32, i32* @global, align 4 +// CHECK-32-NEXT: store i32 [[TMP118]], i32* [[TMP117]], align 4 +// CHECK-32-NEXT: [[TMP119:%.*]] = getelementptr inbounds [3 x %struct.kmp_depend_info], [3 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR8]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP120:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 0 +// CHECK-32-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 ptrtoint (i32* @global to i32), i32* [[TMP121]], align 4 +// CHECK-32-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 4, i32* [[TMP122]], align 4 +// CHECK-32-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP120]], i32 0, i32 2 +// CHECK-32-NEXT: store i8 3, i8* [[TMP123]], align 4 +// CHECK-32-NEXT: [[TMP124:%.*]] = ptrtoint i32* [[A]] to i32 +// CHECK-32-NEXT: [[TMP125:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 1 +// CHECK-32-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP125]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 [[TMP124]], i32* [[TMP126]], align 4 +// CHECK-32-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP125]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 4, i32* [[TMP127]], align 4 +// CHECK-32-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP125]], i32 0, i32 2 +// CHECK-32-NEXT: store i8 3, i8* [[TMP128]], align 4 +// CHECK-32-NEXT: [[TMP129:%.*]] = mul nuw i32 [[TMP1]], 4 +// CHECK-32-NEXT: [[TMP130:%.*]] = ptrtoint float* [[VLA]] to i32 +// CHECK-32-NEXT: [[TMP131:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP119]], i32 2 +// CHECK-32-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP131]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 [[TMP130]], i32* [[TMP132]], align 4 +// CHECK-32-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP131]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 [[TMP129]], i32* [[TMP133]], align 4 +// CHECK-32-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP131]], i32 0, i32 2 +// CHECK-32-NEXT: store i8 3, i8* [[TMP134]], align 4 +// CHECK-32-NEXT: store i32 3, i32* [[DEP_COUNTER_ADDR9]], align 4 +// CHECK-32-NEXT: [[TMP135:%.*]] = bitcast %struct.kmp_depend_info* [[TMP119]] to i8* +// CHECK-32-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 3, i8* [[TMP135]], i32 0, i8* null, i32 0) +// CHECK-32-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP107]]) +// CHECK-32-NEXT: [[TMP136:%.*]] = call i32 @.omp_task_entry..9(i32 [[TMP0]], %struct.kmp_task_t_with_privates.2* [[TMP108]]) #[[ATTR3]] +// CHECK-32-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP107]]) +// CHECK-32-NEXT: br label [[OMP_IF_END]] +// CHECK-32: omp_if.end: +// CHECK-32-NEXT: [[TMP137:%.*]] = load i32, i32* @global, align 4 +// CHECK-32-NEXT: store i32 [[TMP137]], i32* [[GLOBAL_CASTED10]], align 4 +// CHECK-32-NEXT: [[TMP138:%.*]] = load i32, i32* [[GLOBAL_CASTED10]], align 4 +// CHECK-32-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED11]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP140:%.*]] = load i32, i32* @global, align 4 +// CHECK-32-NEXT: store i32 [[TMP140]], i32* [[TMP139]], align 4 +// CHECK-32-NEXT: [[TMP141:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 24, i32 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.5*)* @.omp_task_entry..14 to i32 (i32, i8*)*)) +// CHECK-32-NEXT: [[TMP142:%.*]] = bitcast i8* [[TMP141]] to %struct.kmp_task_t_with_privates.5* +// CHECK-32-NEXT: [[TMP143:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP142]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP144:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP143]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP145:%.*]] = load i8*, i8** [[TMP144]], align 4 +// CHECK-32-NEXT: [[TMP146:%.*]] = bitcast %struct.anon.4* [[AGG_CAPTURED11]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP145]], i8* align 4 [[TMP146]], i32 4, i1 false) +// CHECK-32-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5]], %struct.kmp_task_t_with_privates.5* [[TMP142]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP148:%.*]] = bitcast i8* [[TMP145]] to %struct.anon.4* +// CHECK-32-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_6:%.*]], %struct..kmp_privates.t.6* [[TMP147]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP150:%.*]] = load i32, i32* @global, align 4 +// CHECK-32-NEXT: store i32 [[TMP150]], i32* [[TMP149]], align 4 +// CHECK-32-NEXT: [[TMP151:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], [1 x %struct.kmp_depend_info]* [[DOTDEP_ARR_ADDR12]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP152:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP151]], i32 0 +// CHECK-32-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP152]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 ptrtoint (i32* @global to i32), i32* [[TMP153]], align 4 +// CHECK-32-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP152]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 4, i32* [[TMP154]], align 4 +// CHECK-32-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], %struct.kmp_depend_info* [[TMP152]], i32 0, i32 2 +// CHECK-32-NEXT: store i8 3, i8* [[TMP155]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DEP_COUNTER_ADDR13]], align 4 +// CHECK-32-NEXT: [[TMP156:%.*]] = bitcast %struct.kmp_depend_info* [[TMP151]] to i8* +// CHECK-32-NEXT: call void @__kmpc_omp_taskwait_deps_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i8* [[TMP156]], i32 0, i8* null, i32 0) +// CHECK-32-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP141]]) +// CHECK-32-NEXT: [[TMP157:%.*]] = call i32 @.omp_task_entry..14(i32 [[TMP0]], %struct.kmp_task_t_with_privates.5* [[TMP142]]) #[[ATTR3]] +// CHECK-32-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i8* [[TMP141]]) +// CHECK-32-NEXT: [[TMP158:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP159:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: call void @llvm.stackrestore(i8* [[TMP159]]) +// CHECK-32-NEXT: ret i32 [[TMP158]] +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66 +// CHECK-32-SAME: () #[[ATTR2:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..1 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@.omp_task_entry. +// CHECK-32-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4 +// CHECK-32-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 4 +// CHECK-32-NEXT: [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-32-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !17 +// CHECK-32-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !17 +// CHECK-32-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !17 +// CHECK-32-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !17 +// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !17 +// CHECK-32-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !17 +// CHECK-32-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !17 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP10]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 2, i32* [[TMP14]], align 4, !noalias !17 +// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 0, i32* [[TMP15]], align 4, !noalias !17 +// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 2 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP16]], align 4, !noalias !17 +// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 3 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP17]], align 4, !noalias !17 +// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 4 +// CHECK-32-NEXT: store i64* null, i64** [[TMP18]], align 4, !noalias !17 +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 5 +// CHECK-32-NEXT: store i64* null, i64** [[TMP19]], align 4, !noalias !17 +// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 6 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP20]], align 4, !noalias !17 +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 7 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP21]], align 4, !noalias !17 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 8 +// CHECK-32-NEXT: store i64 10, i64* [[TMP22]], align 8, !noalias !17 +// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 9 +// CHECK-32-NEXT: store i64 0, i64* [[TMP23]], align 8, !noalias !17 +// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 10 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP24]], align 4, !noalias !17 +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 11 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4, !noalias !17 +// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 12 +// CHECK-32-NEXT: store i32 0, i32* [[TMP26]], align 4, !noalias !17 +// CHECK-32-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 [[TMP13]], i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]]) +// CHECK-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__2_EXIT:%.*]] +// CHECK-32: omp_offload.failed.i: +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66() #[[ATTR3]] +// CHECK-32-NEXT: br label [[DOTOMP_OUTLINED__2_EXIT]] +// CHECK-32: .omp_outlined..2.exit: +// CHECK-32-NEXT: ret i32 0 +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76 +// CHECK-32-SAME: (i32* noundef [[PLOCAL:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[PLOCAL]], i32** [[PLOCAL_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[GLOBAL_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[GLOBAL_CASTED]], align 4 +// CHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[TMP0]], i32 [[TMP2]]) +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..3 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef [[PLOCAL:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[PLOCAL]], i32** [[PLOCAL_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-32-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-32: omp.precond.then: +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP17]], i32* [[GLOBAL_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[GLOBAL_CASTED]], align 4 +// CHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32* [[TMP16]], i32 [[TMP18]]) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP22]]) +// CHECK-32-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-32: omp.precond.end: +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..4 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef [[PLOCAL:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[PLOCAL]], i32** [[PLOCAL_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-32-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-32: omp.precond.then: +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP17]], i32* [[TMP18]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP19]], i32* @_ZZ3fooiE6local1, align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP22]]) +// CHECK-32-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-32: omp.precond.end: +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// CHECK-32-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i32*** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]], [2 x i8*]** noalias noundef [[TMP3:%.*]], [2 x i8*]** noalias noundef [[TMP4:%.*]], [2 x i64]** noalias noundef [[TMP5:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 4 +// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32***, align 4 +// CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 4 +// CHECK-32-NEXT: [[DOTADDR3:%.*]] = alloca [2 x i8*]**, align 4 +// CHECK-32-NEXT: [[DOTADDR4:%.*]] = alloca [2 x i8*]**, align 4 +// CHECK-32-NEXT: [[DOTADDR5:%.*]] = alloca [2 x i64]**, align 4 +// CHECK-32-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i32*** [[TMP1]], i32**** [[DOTADDR1]], align 4 +// CHECK-32-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 4 +// CHECK-32-NEXT: store [2 x i8*]** [[TMP3]], [2 x i8*]*** [[DOTADDR3]], align 4 +// CHECK-32-NEXT: store [2 x i8*]** [[TMP4]], [2 x i8*]*** [[DOTADDR4]], align 4 +// CHECK-32-NEXT: store [2 x i64]** [[TMP5]], [2 x i64]*** [[DOTADDR5]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP8:%.*]] = load [2 x i64]**, [2 x i64]*** [[DOTADDR5]], align 4 +// CHECK-32-NEXT: store [2 x i64]* [[TMP7]], [2 x i64]** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32***, i32**** [[DOTADDR1]], align 4 +// CHECK-32-NEXT: store i32** [[TMP9]], i32*** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[DOTADDR2]], align 4 +// CHECK-32-NEXT: store i32* [[TMP11]], i32** [[TMP12]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 3 +// CHECK-32-NEXT: [[TMP14:%.*]] = load [2 x i8*]**, [2 x i8*]*** [[DOTADDR3]], align 4 +// CHECK-32-NEXT: store [2 x i8*]* [[TMP13]], [2 x i8*]** [[TMP14]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load [2 x i8*]**, [2 x i8*]*** [[DOTADDR4]], align 4 +// CHECK-32-NEXT: store [2 x i8*]* [[TMP15]], [2 x i8*]** [[TMP16]], align 4 +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@.omp_task_entry..6 +// CHECK-32-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.1* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4 +// CHECK-32-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32**, align 4 +// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x i8*]*, align 4 +// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca [2 x i8*]*, align 4 +// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR4_I:%.*]] = alloca [2 x i64]*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_5_I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[GLOBAL_CASTED_I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[KERNEL_ARGS_I:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.1*, align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-32-NEXT: store %struct.kmp_task_t_with_privates.1* [[TMP1]], %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.1*, %struct.kmp_task_t_with_privates.1** [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP3]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1]], %struct.kmp_task_t_with_privates.1* [[TMP3]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !27 +// CHECK-32-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !27 +// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27 +// CHECK-32-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32***, i32**, [2 x i8*]**, [2 x i8*]**, [2 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27 +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !27 +// CHECK-32-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32***, i32**, [2 x i8*]**, [2 x i8*]**, [2 x i64]**)* +// CHECK-32-NEXT: call void [[TMP15]](i8* [[TMP14]], i32*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], [2 x i64]** [[DOTFIRSTPRIV_PTR_ADDR4_I]]) #[[ATTR3]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32**, i32*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP18:%.*]] = load [2 x i8*]*, [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP19:%.*]] = load [2 x i8*]*, [2 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP20:%.*]] = load [2 x i64]*, [2 x i64]** [[DOTFIRSTPRIV_PTR_ADDR4_I]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP18]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP19]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[TMP20]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP16]], align 4 +// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK-32-NEXT: store i32 [[TMP28]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !27 +// CHECK-32-NEXT: [[SUB6_I:%.*]] = sub nsw i32 [[TMP29]], 1 +// CHECK-32-NEXT: store i32 [[SUB6_I]], i32* [[DOTCAPTURE_EXPR_5_I]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5_I]], align 4, !noalias !27 +// CHECK-32-NEXT: [[ADD_I:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK-32-NEXT: [[TMP31:%.*]] = zext i32 [[ADD_I]] to i64 +// CHECK-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 2, i32* [[TMP32]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 2, i32* [[TMP33]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 2 +// CHECK-32-NEXT: store i8** [[TMP21]], i8*** [[TMP34]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 3 +// CHECK-32-NEXT: store i8** [[TMP22]], i8*** [[TMP35]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 4 +// CHECK-32-NEXT: store i64* [[TMP23]], i64** [[TMP36]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 5 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP37]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 6 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP38]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 7 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP39]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 8 +// CHECK-32-NEXT: store i64 [[TMP31]], i64* [[TMP40]], align 8, !noalias !27 +// CHECK-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 9 +// CHECK-32-NEXT: store i64 0, i64* [[TMP41]], align 8, !noalias !27 +// CHECK-32-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 10 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP42]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 11 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP43]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]], i32 0, i32 12 +// CHECK-32-NEXT: store i32 0, i32* [[TMP44]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB3]], i64 [[TMP26]], i32 0, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS_I]]) +// CHECK-32-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK-32-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__5_EXIT:%.*]] +// CHECK-32: omp_offload.failed.i: +// CHECK-32-NEXT: [[TMP47:%.*]] = load i32*, i32** [[TMP16]], align 4 +// CHECK-32-NEXT: [[TMP48:%.*]] = load i32, i32* @global, align 4, !noalias !27 +// CHECK-32-NEXT: store i32 [[TMP48]], i32* [[GLOBAL_CASTED_I]], align 4, !noalias !27 +// CHECK-32-NEXT: [[TMP49:%.*]] = load i32, i32* [[GLOBAL_CASTED_I]], align 4, !noalias !27 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76(i32* [[TMP47]], i32 [[TMP49]]) #[[ATTR3]] +// CHECK-32-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT]] +// CHECK-32: .omp_outlined..5.exit: +// CHECK-32-NEXT: ret i32 0 +// CHECK-32-LABEL: define {{[^@]+}}@.omp_task_privates_map..8 +// CHECK-32-SAME: (%struct..kmp_privates.t.3* noalias noundef [[TMP0:%.*]], i32*** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]]) #[[ATTR7]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.3*, align 4 +// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32***, align 4 +// CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 4 +// CHECK-32-NEXT: store %struct..kmp_privates.t.3* [[TMP0]], %struct..kmp_privates.t.3** [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i32*** [[TMP1]], i32**** [[DOTADDR1]], align 4 +// CHECK-32-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t.3*, %struct..kmp_privates.t.3** [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32***, i32**** [[DOTADDR1]], align 4 +// CHECK-32-NEXT: store i32** [[TMP4]], i32*** [[TMP5]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR2]], align 4 +// CHECK-32-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 4 +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@.omp_task_entry..9 +// CHECK-32-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4 +// CHECK-32-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32**, align 4 +// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[GLOBAL_CASTED_I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-32-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.3* [[TMP9]] to i8* +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META31:![0-9]+]]) +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !37 +// CHECK-32-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !37 +// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !37 +// CHECK-32-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32***, i32**)* @.omp_task_privates_map..8 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !37 +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !37 +// CHECK-32-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !37 +// CHECK-32-NEXT: [[TMP12:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 4, !noalias !37 +// CHECK-32-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !37 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !37 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32***, i32**)* +// CHECK-32-NEXT: call void [[TMP15]](i8* [[TMP14]], i32*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32**, i32*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !37 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !37 +// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP12]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32*, i32** [[TMP16]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* @global, align 4, !noalias !37 +// CHECK-32-NEXT: store i32 [[TMP20]], i32* [[GLOBAL_CASTED_I]], align 4, !noalias !37 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[GLOBAL_CASTED_I]], align 4, !noalias !37 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76(i32* [[TMP19]], i32 [[TMP21]]) #[[ATTR3]] +// CHECK-32-NEXT: ret i32 0 +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l83 +// CHECK-32-SAME: (i32 noundef [[GLOBAL:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[GLOBAL_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[GLOBAL_CASTED]], align 4 +// CHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i32 [[TMP1]]) +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..10 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-32-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-32: omp.precond.then: +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP15]], i32* [[GLOBAL_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[GLOBAL_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK-32-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP18]]) +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK-32-NEXT: call void @.omp_outlined..11(i32* [[TMP19]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP13]], i32 [[TMP14]], i32 [[TMP16]]) #[[ATTR3]] +// CHECK-32-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP18]]) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP23]]) +// CHECK-32-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-32: omp.precond.end: +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..11 +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-32-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-32: omp.precond.then: +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32: cond.true: +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: br label [[COND_END:%.*]] +// CHECK-32: cond.false: +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: br label [[COND_END]] +// CHECK-32: cond.end: +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32: omp.inner.for.cond: +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32: omp.inner.for.body: +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[GLOBAL_ADDR]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-32: omp.body.continue: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-32: omp.inner.for.inc: +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK-32-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-32: omp.inner.for.end: +// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-32: omp.loop.exit: +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP19]]) +// CHECK-32-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-32: omp.precond.end: +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@.omp_task_privates_map..13 +// CHECK-32-SAME: (%struct..kmp_privates.t.6* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.6*, align 4 +// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 4 +// CHECK-32-NEXT: store %struct..kmp_privates.t.6* [[TMP0]], %struct..kmp_privates.t.6** [[DOTADDR]], align 4 +// CHECK-32-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t.6*, %struct..kmp_privates.t.6** [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_6:%.*]], %struct..kmp_privates.t.6* [[TMP2]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 4 +// CHECK-32-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 4 +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@.omp_task_entry..14 +// CHECK-32-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.5* noalias noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 4 +// CHECK-32-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK-32-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[GLOBAL_CASTED_I:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.5*, align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-32-NEXT: store %struct.kmp_task_t_with_privates.5* [[TMP1]], %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.5*, %struct.kmp_task_t_with_privates.5** [[DOTADDR1]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], %struct.kmp_task_t_with_privates.5* [[TMP3]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5]], %struct.kmp_task_t_with_privates.5* [[TMP3]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.6* [[TMP9]] to i8* +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) +// CHECK-32-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !47 +// CHECK-32-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !47 +// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !47 +// CHECK-32-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.6*, i32**)* @.omp_task_privates_map..13 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !47 +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !47 +// CHECK-32-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 4, !noalias !47 +// CHECK-32-NEXT: [[TMP12:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 4, !noalias !47 +// CHECK-32-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !47 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !47 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* +// CHECK-32-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR3]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !47 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK-32-NEXT: store i32 [[TMP17]], i32* [[GLOBAL_CASTED_I]], align 4, !noalias !47 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[GLOBAL_CASTED_I]], align 4, !noalias !47 +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l83(i32 [[TMP18]]) #[[ATTR3]] +// CHECK-32-NEXT: ret i32 0 +// CHECK-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK-32-SAME: () #[[ATTR7]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-32-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66 +// TCHECK-64-SAME: () #[[ATTR0:[0-9]+]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@.omp_outlined. +// TCHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// TCHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// TCHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// TCHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// TCHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// TCHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// TCHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// TCHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// TCHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// TCHECK-64: cond.true: +// TCHECK-64-NEXT: br label [[COND_END:%.*]] +// TCHECK-64: cond.false: +// TCHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-64-NEXT: br label [[COND_END]] +// TCHECK-64: cond.end: +// TCHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// TCHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// TCHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// TCHECK-64: omp.inner.for.cond: +// TCHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// TCHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// TCHECK-64: omp.inner.for.body: +// TCHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// TCHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// TCHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// TCHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// TCHECK-64: omp.inner.for.inc: +// TCHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// TCHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// TCHECK-64: omp.inner.for.end: +// TCHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// TCHECK-64: omp.loop.exit: +// TCHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..1 +// TCHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// TCHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// TCHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// TCHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// TCHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// TCHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// TCHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// TCHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// TCHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// TCHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// TCHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// TCHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// TCHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// TCHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// TCHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// TCHECK-64: cond.true: +// TCHECK-64-NEXT: br label [[COND_END:%.*]] +// TCHECK-64: cond.false: +// TCHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// TCHECK-64-NEXT: br label [[COND_END]] +// TCHECK-64: cond.end: +// TCHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// TCHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// TCHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// TCHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// TCHECK-64: omp.inner.for.cond: +// TCHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// TCHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// TCHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// TCHECK-64: omp.inner.for.body: +// TCHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// TCHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// TCHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// TCHECK-64: omp.body.continue: +// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// TCHECK-64: omp.inner.for.inc: +// TCHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// TCHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// TCHECK-64: omp.inner.for.end: +// TCHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// TCHECK-64: omp.loop.exit: +// TCHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76 +// TCHECK-64-SAME: (i64* noundef [[PLOCAL:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i64*, align 8 +// TCHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: store i64* [[PLOCAL]], i64** [[PLOCAL_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32* +// TCHECK-64-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GLOBAL_CASTED]] to i32* +// TCHECK-64-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 +// TCHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[GLOBAL_CASTED]], align 8 +// TCHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64*, i64)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i64* [[TMP0]], i64 [[TMP2]]) +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..2 +// TCHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64* noundef [[PLOCAL:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// TCHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// TCHECK-64-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i64*, align 8 +// TCHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[I4:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// TCHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// TCHECK-64-NEXT: store i64* [[PLOCAL]], i64** [[PLOCAL_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32* +// TCHECK-64-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8 +// TCHECK-64-NEXT: store i64 [[TMP1]], i64* [[DOTCAPTURE_EXPR_]], align 8 +// TCHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8 +// TCHECK-64-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP2]], 0 +// TCHECK-64-NEXT: [[DIV:%.*]] = sdiv i64 [[SUB]], 1 +// TCHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[DIV]] to i32 +// TCHECK-64-NEXT: [[SUB3:%.*]] = sub nsw i32 [[CONV2]], 1 +// TCHECK-64-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-64-NEXT: store i32 0, i32* [[I]], align 4 +// TCHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8 +// TCHECK-64-NEXT: [[CMP:%.*]] = icmp slt i64 0, [[TMP3]] +// TCHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// TCHECK-64: omp.precond.then: +// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// TCHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// TCHECK-64-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// TCHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// TCHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-64-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// TCHECK-64-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// TCHECK-64: cond.true: +// TCHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-64-NEXT: br label [[COND_END:%.*]] +// TCHECK-64: cond.false: +// TCHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-64-NEXT: br label [[COND_END]] +// TCHECK-64: cond.end: +// TCHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// TCHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// TCHECK-64-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// TCHECK-64: omp.inner.for.cond: +// TCHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// TCHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// TCHECK-64: omp.inner.for.body: +// TCHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// TCHECK-64-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// TCHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-64-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// TCHECK-64-NEXT: [[TMP18:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-64-NEXT: [[CONV7:%.*]] = bitcast i64* [[GLOBAL_CASTED]] to i32* +// TCHECK-64-NEXT: store i32 [[TMP19]], i32* [[CONV7]], align 4 +// TCHECK-64-NEXT: [[TMP20:%.*]] = load i64, i64* [[GLOBAL_CASTED]], align 8 +// TCHECK-64-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64* [[TMP18]], i64 [[TMP20]]) +// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// TCHECK-64: omp.inner.for.inc: +// TCHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// TCHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// TCHECK-64: omp.inner.for.end: +// TCHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// TCHECK-64: omp.loop.exit: +// TCHECK-64-NEXT: [[TMP23:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP23]], align 4 +// TCHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP24]]) +// TCHECK-64-NEXT: br label [[OMP_PRECOND_END]] +// TCHECK-64: omp.precond.end: +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..3 +// TCHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64* noundef [[PLOCAL:%.*]], i64 noundef [[GLOBAL:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// TCHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// TCHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i64*, align 8 +// TCHECK-64-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: [[I6:%.*]] = alloca i32, align 4 +// TCHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// TCHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// TCHECK-64-NEXT: store i64* [[PLOCAL]], i64** [[PLOCAL_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[GLOBAL]], i64* [[GLOBAL_ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[GLOBAL_ADDR]] to i32* +// TCHECK-64-NEXT: [[TMP0:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 8 +// TCHECK-64-NEXT: store i64 [[TMP1]], i64* [[DOTCAPTURE_EXPR_]], align 8 +// TCHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8 +// TCHECK-64-NEXT: [[SUB:%.*]] = sub nsw i64 [[TMP2]], 0 +// TCHECK-64-NEXT: [[DIV:%.*]] = sdiv i64 [[SUB]], 1 +// TCHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[DIV]] to i32 +// TCHECK-64-NEXT: [[SUB3:%.*]] = sub nsw i32 [[CONV2]], 1 +// TCHECK-64-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-64-NEXT: store i32 0, i32* [[I]], align 4 +// TCHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_]], align 8 +// TCHECK-64-NEXT: [[CMP:%.*]] = icmp slt i64 0, [[TMP3]] +// TCHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// TCHECK-64: omp.precond.then: +// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// TCHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// TCHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 +// TCHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 +// TCHECK-64-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 +// TCHECK-64-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// TCHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// TCHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// TCHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// TCHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// TCHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// TCHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-64-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// TCHECK-64-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// TCHECK-64: cond.true: +// TCHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-64-NEXT: br label [[COND_END:%.*]] +// TCHECK-64: cond.false: +// TCHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// TCHECK-64-NEXT: br label [[COND_END]] +// TCHECK-64: cond.end: +// TCHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// TCHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// TCHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// TCHECK-64-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// TCHECK-64: omp.inner.for.cond: +// TCHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// TCHECK-64-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// TCHECK-64-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// TCHECK-64: omp.inner.for.body: +// TCHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// TCHECK-64-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 +// TCHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-64-NEXT: [[CONV9:%.*]] = sext i32 [[TMP17]] to i64 +// TCHECK-64-NEXT: [[TMP18:%.*]] = load i64*, i64** [[PLOCAL_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[CONV9]], i64* [[TMP18]], align 8 +// TCHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-64-NEXT: store i32 [[TMP19]], i32* @_ZZ3fooiE6local1, align 4 +// TCHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// TCHECK-64: omp.body.continue: +// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// TCHECK-64: omp.inner.for.inc: +// TCHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 +// TCHECK-64-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// TCHECK-64: omp.inner.for.end: +// TCHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// TCHECK-64: omp.loop.exit: +// TCHECK-64-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// TCHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP22]]) +// TCHECK-64-NEXT: br label [[OMP_PRECOND_END]] +// TCHECK-64: omp.precond.end: +// TCHECK-64-NEXT: ret void +// TCHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66 +// TCHECK-32-SAME: () #[[ATTR0:[0-9]+]] { +// TCHECK-32-NEXT: entry: +// TCHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// TCHECK-32-NEXT: ret void +// TCHECK-32-LABEL: define {{[^@]+}}@.omp_outlined. +// TCHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// TCHECK-32-NEXT: entry: +// TCHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// TCHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// TCHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// TCHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// TCHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// TCHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// TCHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// TCHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// TCHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// TCHECK-32: cond.true: +// TCHECK-32-NEXT: br label [[COND_END:%.*]] +// TCHECK-32: cond.false: +// TCHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-32-NEXT: br label [[COND_END]] +// TCHECK-32: cond.end: +// TCHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// TCHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// TCHECK-32: omp.inner.for.cond: +// TCHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// TCHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// TCHECK-32: omp.inner.for.body: +// TCHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// TCHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]]) +// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// TCHECK-32: omp.inner.for.inc: +// TCHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// TCHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// TCHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// TCHECK-32: omp.inner.for.end: +// TCHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// TCHECK-32: omp.loop.exit: +// TCHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// TCHECK-32-NEXT: ret void +// TCHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..1 +// TCHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// TCHECK-32-NEXT: entry: +// TCHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// TCHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// TCHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// TCHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// TCHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// TCHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// TCHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// TCHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// TCHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// TCHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// TCHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// TCHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// TCHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// TCHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// TCHECK-32: cond.true: +// TCHECK-32-NEXT: br label [[COND_END:%.*]] +// TCHECK-32: cond.false: +// TCHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// TCHECK-32-NEXT: br label [[COND_END]] +// TCHECK-32: cond.end: +// TCHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// TCHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// TCHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// TCHECK-32: omp.inner.for.cond: +// TCHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// TCHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// TCHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// TCHECK-32: omp.inner.for.body: +// TCHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// TCHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// TCHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// TCHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// TCHECK-32: omp.body.continue: +// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// TCHECK-32: omp.inner.for.inc: +// TCHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// TCHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// TCHECK-32: omp.inner.for.end: +// TCHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// TCHECK-32: omp.loop.exit: +// TCHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// TCHECK-32-NEXT: ret void +// TCHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l76 +// TCHECK-32-SAME: (i32* noundef [[PLOCAL:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR0]] { +// TCHECK-32-NEXT: entry: +// TCHECK-32-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i32*, align 4 +// TCHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: store i32* [[PLOCAL]], i32** [[PLOCAL_ADDR]], align 4 +// TCHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP1]], i32* [[GLOBAL_CASTED]], align 4 +// TCHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[GLOBAL_CASTED]], align 4 +// TCHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* [[TMP0]], i32 [[TMP2]]) +// TCHECK-32-NEXT: ret void +// TCHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..2 +// TCHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef [[PLOCAL:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR0]] { +// TCHECK-32-NEXT: entry: +// TCHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// TCHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// TCHECK-32-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i32*, align 4 +// TCHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[GLOBAL_CASTED:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// TCHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// TCHECK-32-NEXT: store i32* [[PLOCAL]], i32** [[PLOCAL_ADDR]], align 4 +// TCHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// TCHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// TCHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// TCHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// TCHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// TCHECK-32-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-32-NEXT: store i32 0, i32* [[I]], align 4 +// TCHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// TCHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// TCHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// TCHECK-32: omp.precond.then: +// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// TCHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// TCHECK-32-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// TCHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// TCHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// TCHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// TCHECK-32: cond.true: +// TCHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-32-NEXT: br label [[COND_END:%.*]] +// TCHECK-32: cond.false: +// TCHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-32-NEXT: br label [[COND_END]] +// TCHECK-32: cond.end: +// TCHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// TCHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// TCHECK-32: omp.inner.for.cond: +// TCHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// TCHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// TCHECK-32: omp.inner.for.body: +// TCHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// TCHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// TCHECK-32-NEXT: [[TMP16:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP17]], i32* [[GLOBAL_CASTED]], align 4 +// TCHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[GLOBAL_CASTED]], align 4 +// TCHECK-32-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32* [[TMP16]], i32 [[TMP18]]) +// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// TCHECK-32: omp.inner.for.inc: +// TCHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// TCHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// TCHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// TCHECK-32: omp.inner.for.end: +// TCHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// TCHECK-32: omp.loop.exit: +// TCHECK-32-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// TCHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP22]]) +// TCHECK-32-NEXT: br label [[OMP_PRECOND_END]] +// TCHECK-32: omp.precond.end: +// TCHECK-32-NEXT: ret void +// TCHECK-32-LABEL: define {{[^@]+}}@.omp_outlined..3 +// TCHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32* noundef [[PLOCAL:%.*]], i32 noundef [[GLOBAL:%.*]]) #[[ATTR0]] { +// TCHECK-32-NEXT: entry: +// TCHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// TCHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// TCHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[PLOCAL_ADDR:%.*]] = alloca i32*, align 4 +// TCHECK-32-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4 +// TCHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// TCHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// TCHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// TCHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// TCHECK-32-NEXT: store i32* [[PLOCAL]], i32** [[PLOCAL_ADDR]], align 4 +// TCHECK-32-NEXT: store i32 [[GLOBAL]], i32* [[GLOBAL_ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// TCHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// TCHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// TCHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// TCHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// TCHECK-32-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-32-NEXT: store i32 0, i32* [[I]], align 4 +// TCHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// TCHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// TCHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// TCHECK-32: omp.precond.then: +// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// TCHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// TCHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// TCHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// TCHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// TCHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// TCHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// TCHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// TCHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// TCHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// TCHECK-32: cond.true: +// TCHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// TCHECK-32-NEXT: br label [[COND_END:%.*]] +// TCHECK-32: cond.false: +// TCHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// TCHECK-32-NEXT: br label [[COND_END]] +// TCHECK-32: cond.end: +// TCHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// TCHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// TCHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// TCHECK-32: omp.inner.for.cond: +// TCHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// TCHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// TCHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// TCHECK-32: omp.inner.for.body: +// TCHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// TCHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// TCHECK-32-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// TCHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP18:%.*]] = load i32*, i32** [[PLOCAL_ADDR]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP17]], i32* [[TMP18]], align 4 +// TCHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[GLOBAL_ADDR]], align 4 +// TCHECK-32-NEXT: store i32 [[TMP19]], i32* @_ZZ3fooiE6local1, align 4 +// TCHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// TCHECK-32: omp.body.continue: +// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// TCHECK-32: omp.inner.for.inc: +// TCHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// TCHECK-32-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// TCHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] +// TCHECK-32: omp.inner.for.end: +// TCHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// TCHECK-32: omp.loop.exit: +// TCHECK-32-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// TCHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 4 +// TCHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP22]]) +// TCHECK-32-NEXT: br label [[OMP_PRECOND_END]] +// TCHECK-32: omp.precond.end: +// TCHECK-32-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66 +// CHECK-SAME: () #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66.omp_outlined) +// CHECK-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK-SAME: () #[[ATTR7:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66 +// TCHECK-SAME: () #[[ATTR0:[0-9]+]] { +// TCHECK-NEXT: entry: +// TCHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l66.omp_outlined) +// TCHECK-NEXT: ret void +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +// +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// SIMD-ONLY0: {{.*}} +// SIMD-ONLY1: {{.*}} diff --git a/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp @@ -0,0 +1,1576 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1 + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1 + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple aarch64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple aarch64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple aarch64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1 + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple aarch64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple aarch64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple aarch64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple aarch64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple aarch64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple aarch64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1 + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple aarch64-unknown-unknown -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple aarch64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple aarch64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +void fn1(); +void fn2(); +void fn3(); +void fn4(); +void fn5(); +void fn6(); + +int Arg; + +void gtid_test() { +#pragma omp target teams loop + for(int i = 0 ; i < 100; i++) {} + +#pragma omp target teams loop if (parallel: false) + for(int i = 0 ; i < 100; i++) { + gtid_test(); + } +} + + +template +int tmain(T Arg) { +#pragma omp target teams loop if (true) + for(int i = 0 ; i < 100; i++) { + fn1(); + } +#pragma omp target teams loop if (false) + for(int i = 0 ; i < 100; i++) { + fn2(); + } +#pragma omp target teams loop if (parallel: Arg) + for(int i = 0 ; i < 100; i++) { + fn3(); + } + return 0; +} + +int main() { +#pragma omp target teams loop if (true) + for(int i = 0 ; i < 100; i++) { + + + fn4(); + } + +#pragma omp target teams loop if (false) + for(int i = 0 ; i < 100; i++) { + + + fn5(); + } + +#pragma omp target teams loop if (Arg) + for(int i = 0 ; i < 100; i++) { + + + fn6(); + } + + return tmain(Arg); +} + + + + + + +// call void [[T_OUTLINE_FUN_3:@.+]]( + +#endif +// CHECK1-LABEL: define {{[^@]+}}@_Z9gtid_testv +// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS2:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48() #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP27]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51.region_id, ptr [[KERNEL_ARGS2]]) +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]] +// CHECK1: omp_offload.failed3: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51() #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT4]] +// CHECK1: omp_offload.cont4: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 +// CHECK1-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51 +// CHECK1-SAME: () #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51.omp_outlined.omp_outlined(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: call void @_Z9gtid_testv() +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@main +// CHECK1-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76() #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83() #[[ATTR2]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr @Arg, align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1: omp_if.then: +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP24]] to i1 +// CHECK1-NEXT: [[TMP25:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP28]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP37]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] [[TMP26]], ptr [[TMP38]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP25]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS6]]) +// CHECK1-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK1-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] +// CHECK1: omp_offload.failed7: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP17]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]] +// CHECK1: omp_offload.cont8: +// CHECK1-NEXT: br label [[OMP_IF_END:%.*]] +// CHECK1: omp_if.else: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP17]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_IF_END]] +// CHECK1: omp_if.end: +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr @Arg, align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP42]]) +// CHECK1-NEXT: ret i32 [[CALL]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 +// CHECK1-SAME: () #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: call void @_Z3fn4v() +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83 +// CHECK1-SAME: () #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: call void @_Z3fn5v() +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90 +// CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined, i64 [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1: omp_if.then: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: br label [[OMP_IF_END:%.*]] +// CHECK1: omp_if.else: +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined.omp_outlined(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: br label [[OMP_IF_END]] +// CHECK1: omp_if.end: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: call void @_Z3fn6v() +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiEiT_ +// CHECK1-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0]] comdat { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: store i32 [[ARG]], ptr [[ARG_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60() #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64() #[[ATTR2]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP23]] to i1 +// CHECK1-NEXT: [[TMP24:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP27]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP36]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] [[TMP25]], ptr [[TMP37]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP38]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP24]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS5]]) +// CHECK1-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK1-NEXT: br i1 [[TMP40]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK1: omp_offload.failed6: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68(i64 [[TMP17]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK1: omp_offload.cont7: +// CHECK1-NEXT: ret i32 0 +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60 +// CHECK1-SAME: () #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: call void @_Z3fn1v() +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64 +// CHECK1-SAME: () #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64.omp_outlined.omp_outlined(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: call void @_Z3fn2v() +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68 +// CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined, i64 [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1: omp_if.then: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: br label [[OMP_IF_END:%.*]] +// CHECK1: omp_if.else: +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined.omp_outlined(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: br label [[OMP_IF_END]] +// CHECK1: omp_if.end: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: call void @_Z3fn3v() +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK1-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK1-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_teams_generic_loop_order_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_order_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_teams_generic_loop_order_codegen.cpp @@ -0,0 +1,207 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1 + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// REQUIRES: powerpc-registered-target + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +void gtid_test() { +#pragma omp target teams loop order(concurrent) + for(int i = 0 ; i < 100; i++) {} +} + + + + +#endif +// CHECK1-LABEL: define {{[^@]+}}@_Z9gtid_testv +// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16() #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16 +// CHECK1-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK1-SAME: () #[[ATTR3:[0-9]+]] section ".text.startup" { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK1-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp @@ -0,0 +1,3124 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK3 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK3 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK5 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK5 + +// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// Test target codegen - host bc file has to be created first. (no significant differences with host version of target region) +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK13 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK13 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK15 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK15 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK17 + +// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &st) : a(st.a + st.b), b(0) {} + ~St() {} +}; + +volatile int g = 1212; +volatile int &g1 = g; + +template +struct S { + T f; + S(T a) : f(a + g) {} + S() : f(g) {} + S(const S &s, St t = St()) : f(s.f + t.a) {} + operator T() { return T(); } + ~S() {} +}; + + +template +T tmain() { + S test; + T t_var = T(); + T vec[] = {1, 2}; + S s_arr[] = {1, 2}; + S &var = test; +#pragma omp target teams loop private(t_var, vec, s_arr, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +// HCHECK-DAG: [[TEST:@.+]] ={{.*}} global [[S_FLOAT_TY]] zeroinitializer, +S test; +// HCHECK-DAG: [[T_VAR:@.+]] ={{.+}} global i{{[0-9]+}} 333, +int t_var = 333; +// HCHECK-DAG: [[VEC:@.+]] ={{.+}} global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2], +int vec[] = {1, 2}; +// HCHECK-DAG: [[S_ARR:@.+]] ={{.+}} global [2 x [[S_FLOAT_TY]]] zeroinitializer, +S s_arr[] = {1, 2}; +// HCHECK-DAG: [[VAR:@.+]] ={{.+}} global [[S_FLOAT_TY]] zeroinitializer, +S var(3); +// HCHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + +int main() { + static int sivar; +#ifdef LAMBDA + [&]() { +#pragma omp target teams loop private(g, g1, sivar) + for (int i = 0; i < 2; ++i) { + + // Skip global, bound tid and loop vars + + g = 1; + g1 = 1; + sivar = 2; + + // Skip global, bound tid and loop vars + [&]() { + g = 2; + g1 = 2; + sivar = 4; + + }(); + } + }(); + return 0; +#else +#pragma omp target teams loop private(t_var, vec, s_arr, var, sivar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + sivar += i; + } + return tmain(); +#endif +} + +// HCHECK: define {{.*}}i{{[0-9]+}} @main() +// HCHECK: call i32 @__tgt_target_teams_mapper(ptr @{{.+}}, i64 -1, ptr @{{[^,]+}}, i32 0, ptr null, ptr null, {{.+}} null, {{.+}} null, ptr null, ptr null, i32 0, i32 0) +// HCHECK: call void @[[OFFL1:.+]]() +// HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() +// HCHECK: ret + +// HCHECK: define{{.*}} void @[[OFFL1]]() + +// Skip global, bound tid and loop vars + +// private(s_arr) + +// private(var) + + +// Skip global, bound tid and loop vars + +// private(s_arr) + +// private(var) + + +// HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() +// HCHECK: call i32 @__tgt_target_teams_mapper(ptr @{{.+}}, i64 -1, ptr @{{[^,]+}}, i32 0, +// HCHECK: call void @[[TOFFL1:.+]]() +// HCHECK: ret + +// HCHECK: define {{.*}}void @[[TOFFL1]]() + +// Skip global, bound tid and loop vars + +// private(s_arr) + + +// private(var) + + +// Skip global, bound tid and loop vars +// prev lb and ub +// iter variables + +// private(s_arr) + + +// private(var) + + + +#endif +// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init +// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @test) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN1SIfED1Ev, ptr @test, ptr @__dso_handle) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIfED2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK1-NEXT: store float [[CONV]], ptr [[F]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) @s_arr, float noundef 1.000000e+00) +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S:%.*]], ptr @s_arr, i64 1), float noundef 2.000000e+00) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr null, ptr @__dso_handle) #[[ATTR2]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ef +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], float noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfEC2Ef(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], float noundef [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_array_dtor +// CHECK1-SAME: (ptr noundef [[TMP0:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1: arraydestroy.body: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S:%.*]], ptr @s_arr, i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @s_arr +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done1: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], float noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to float +// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]] +// CHECK1-NEXT: store float [[ADD]], ptr [[F]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) @var, float noundef 3.000000e+00) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN1SIfED1Ev, ptr @var, ptr @__dso_handle) #[[ATTR2]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@main +// CHECK1-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124() #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v() +// CHECK1-NEXT: ret i32 [[CALL]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 +// CHECK1-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK1: arrayctor.loop: +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK1: arrayctor.cont: +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1: omp.inner.for.cond.cleanup: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1: arraydestroy.body: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done3: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK1: arrayctor.loop: +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK1: arrayctor.cont: +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1: omp.inner.for.cond.cleanup: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1: arraydestroy.body: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80() #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1: arraydestroy.body: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done2: +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 +// CHECK1-SAME: () #[[ATTR4]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK1: arrayctor.loop: +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK1: arrayctor.cont: +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1: omp.inner.for.cond.cleanup: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP14]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1: arraydestroy.body: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK1: arrayctor.loop: +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK1: arrayctor.cont: +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1: omp.inner.for.cond.cleanup: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1: arraydestroy.body: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done9: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[F]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_teams_generic_loop_private_codegen.cpp +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__cxx_global_var_init() +// CHECK1-NEXT: call void @__cxx_global_var_init.1() +// CHECK1-NEXT: call void @__cxx_global_var_init.2() +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK1-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init +// CHECK3-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @test) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN1SIfED1Ev, ptr @test, ptr @__dso_handle) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIfED2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK3-NEXT: store float [[CONV]], ptr [[F]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 +// CHECK3-SAME: () #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) @s_arr, float noundef 1.000000e+00) +// CHECK3-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S:%.*]], ptr @s_arr, i32 1), float noundef 2.000000e+00) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr null, ptr @__dso_handle) #[[ATTR2]] +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ef +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], float noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIfEC2Ef(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], float noundef [[TMP0]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_array_dtor +// CHECK3-SAME: (ptr noundef [[TMP0:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK3: arraydestroy.body: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S:%.*]], ptr @s_arr, i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @s_arr +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done1: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], float noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to float +// CHECK3-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]] +// CHECK3-NEXT: store float [[ADD]], ptr [[F]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 +// CHECK3-SAME: () #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) @var, float noundef 3.000000e+00) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN1SIfED1Ev, ptr @var, ptr @__dso_handle) #[[ATTR2]] +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@main +// CHECK3-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124() #[[ATTR2]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK3-NEXT: ret i32 [[CALL]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 +// CHECK3-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK3: arrayctor.loop: +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK3: arrayctor.cont: +// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3: omp.inner.for.cond.cleanup: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK3: arraydestroy.body: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done3: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK3: arrayctor.loop: +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK3: arrayctor.cont: +// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3: omp.inner.for.cond.cleanup: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK3: arraydestroy.body: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done6: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 +// CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80() #[[ATTR2]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK3: arraydestroy.body: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done2: +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK3-NEXT: ret i32 [[TMP16]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 +// CHECK3-SAME: () #[[ATTR4]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK3: arrayctor.loop: +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK3: arrayctor.cont: +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3: omp.inner.for.cond.cleanup: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK3: arraydestroy.body: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done5: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK3: arrayctor.loop: +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK3: arrayctor.cont: +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3: omp.inner.for.cond.cleanup: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK3: arraydestroy.body: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done7: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiED2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[F]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_teams_generic_loop_private_codegen.cpp +// CHECK3-SAME: () #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @__cxx_global_var_init() +// CHECK3-NEXT: call void @__cxx_global_var_init.1() +// CHECK3-NEXT: call void @__cxx_global_var_init.2() +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK3-SAME: () #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK3-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@__cxx_global_var_init +// CHECK5-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @test) +// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN1SIfED1Ev, ptr @test, ptr @__dso_handle) #[[ATTR2:[0-9]+]] +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev +// CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: call void @_ZN1SIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev +// CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: call void @_ZN1SIfED2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev +// CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK5-NEXT: store float [[CONV]], ptr [[F]], align 4 +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev +// CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 +// CHECK5-SAME: () #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) @s_arr, float noundef 1.000000e+00) +// CHECK5-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S:%.*]], ptr @s_arr, i64 1), float noundef 2.000000e+00) +// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr null, ptr @__dso_handle) #[[ATTR2]] +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ef +// CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], float noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: call void @_ZN1SIfEC2Ef(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], float noundef [[TMP0]]) +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@__cxx_global_array_dtor +// CHECK5-SAME: (ptr noundef [[TMP0:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK5: arraydestroy.body: +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S:%.*]], ptr @s_arr, i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @s_arr +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done1: +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef +// CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], float noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to float +// CHECK5-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]] +// CHECK5-NEXT: store float [[ADD]], ptr [[F]], align 4 +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 +// CHECK5-SAME: () #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) @var, float noundef 3.000000e+00) +// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN1SIfED1Ev, ptr @var, ptr @__dso_handle) #[[ATTR2]] +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@main +// CHECK5-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1 +// CHECK5-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK5-NEXT: call void @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// CHECK5-NEXT: ret i32 0 +// +// +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 +// CHECK5-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined) +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8 +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5: cond.true: +// CHECK5-NEXT: br label [[COND_END:%.*]] +// CHECK5: cond.false: +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: br label [[COND_END]] +// CHECK5: cond.end: +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK5: omp.inner.for.cond: +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5: omp.inner.for.body: +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK5: omp.inner.for.inc: +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK5: omp.inner.for.end: +// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK5: omp.loop.exit: +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8 +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5: cond.true: +// CHECK5-NEXT: br label [[COND_END:%.*]] +// CHECK5: cond.false: +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: br label [[COND_END]] +// CHECK5: cond.end: +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK5: omp.inner.for.cond: +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5: omp.inner.for.body: +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK5-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 +// CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK5: omp.body.continue: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK5: omp.inner.for.inc: +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK5: omp.inner.for.end: +// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK5: omp.loop.exit: +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_target_teams_generic_loop_private_codegen.cpp +// CHECK5-SAME: () #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: call void @__cxx_global_var_init() +// CHECK5-NEXT: call void @__cxx_global_var_init.1() +// CHECK5-NEXT: call void @__cxx_global_var_init.2() +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK5-SAME: () #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK5-NEXT: ret void +// +// +// CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 +// CHECK13-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK13-NEXT: entry: +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) +// CHECK13-NEXT: ret void +// +// +// CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK13-NEXT: entry: +// CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK13: arrayctor.loop: +// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4:[0-9]+]] +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK13: arrayctor.cont: +// CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13: cond.true: +// CHECK13-NEXT: br label [[COND_END:%.*]] +// CHECK13: cond.false: +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: br label [[COND_END]] +// CHECK13: cond.end: +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK13: omp.inner.for.cond: +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13: omp.inner.for.cond.cleanup: +// CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK13: omp.inner.for.body: +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK13: omp.inner.for.inc: +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK13: omp.inner.for.end: +// CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK13: omp.loop.exit: +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]]) +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] +// CHECK13-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK13: arraydestroy.body: +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done3: +// CHECK13-NEXT: ret void +// +// +// CHECK13-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev +// CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK13-NEXT: entry: +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: call void @_ZN1SIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK13-NEXT: ret void +// +// +// CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK13-NEXT: entry: +// CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK13: arrayctor.loop: +// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK13: arrayctor.cont: +// CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13: cond.true: +// CHECK13-NEXT: br label [[COND_END:%.*]] +// CHECK13: cond.false: +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: br label [[COND_END]] +// CHECK13: cond.end: +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK13: omp.inner.for.cond: +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13: omp.inner.for.cond.cleanup: +// CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK13: omp.inner.for.body: +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK13-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK13-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 +// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK13: omp.body.continue: +// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK13: omp.inner.for.inc: +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK13: omp.inner.for.end: +// CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK13: omp.loop.exit: +// CHECK13-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK13: arraydestroy.body: +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done8: +// CHECK13-NEXT: ret void +// +// +// CHECK13-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev +// CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-NEXT: entry: +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: call void @_ZN1SIfED2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK13-NEXT: ret void +// +// +// CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 +// CHECK13-SAME: () #[[ATTR0]] { +// CHECK13-NEXT: entry: +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) +// CHECK13-NEXT: ret void +// +// +// CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK13-NEXT: entry: +// CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK13-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 +// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK13: arrayctor.loop: +// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK13: arrayctor.cont: +// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13: cond.true: +// CHECK13-NEXT: br label [[COND_END:%.*]] +// CHECK13: cond.false: +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: br label [[COND_END]] +// CHECK13: cond.end: +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK13: omp.inner.for.cond: +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13: omp.inner.for.cond.cleanup: +// CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK13: omp.inner.for.body: +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK13: omp.inner.for.inc: +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK13: omp.inner.for.end: +// CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK13: omp.loop.exit: +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP14]]) +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK13: arraydestroy.body: +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done5: +// CHECK13-NEXT: ret void +// +// +// CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-NEXT: entry: +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: call void @_ZN1SIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK13-NEXT: ret void +// +// +// CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK13-NEXT: entry: +// CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK13-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 +// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK13: arrayctor.loop: +// CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK13: arrayctor.cont: +// CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK13: cond.true: +// CHECK13-NEXT: br label [[COND_END:%.*]] +// CHECK13: cond.false: +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: br label [[COND_END]] +// CHECK13: cond.end: +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK13: omp.inner.for.cond: +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13: omp.inner.for.cond.cleanup: +// CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK13: omp.inner.for.body: +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK13: omp.body.continue: +// CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK13: omp.inner.for.inc: +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK13: omp.inner.for.end: +// CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK13: omp.loop.exit: +// CHECK13-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK13: arraydestroy.body: +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done9: +// CHECK13-NEXT: ret void +// +// +// CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-NEXT: entry: +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: call void @_ZN1SIiED2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK13-NEXT: ret void +// +// +// CHECK13-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev +// CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-NEXT: entry: +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK13-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK13-NEXT: store float [[CONV]], ptr [[F]], align 4 +// CHECK13-NEXT: ret void +// +// +// CHECK13-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev +// CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-NEXT: entry: +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: ret void +// +// +// CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-NEXT: entry: +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK13-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 +// CHECK13-NEXT: ret void +// +// +// CHECK13-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK13-NEXT: entry: +// CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK13-NEXT: ret void +// +// +// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 +// CHECK15-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) +// CHECK15-NEXT: ret void +// +// +// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK15: arrayctor.loop: +// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4:[0-9]+]] +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK15: arrayctor.cont: +// CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK15: cond.true: +// CHECK15-NEXT: br label [[COND_END:%.*]] +// CHECK15: cond.false: +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END]] +// CHECK15: cond.end: +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15: omp.inner.for.cond.cleanup: +// CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]]) +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] +// CHECK15-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK15: arraydestroy.body: +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done3: +// CHECK15-NEXT: ret void +// +// +// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev +// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK15-NEXT: ret void +// +// +// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined.omp_outlined +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK15: arrayctor.loop: +// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK15: arrayctor.cont: +// CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK15: cond.true: +// CHECK15-NEXT: br label [[COND_END:%.*]] +// CHECK15: cond.false: +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END]] +// CHECK15: cond.end: +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15: omp.inner.for.cond.cleanup: +// CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK15-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 +// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK15: omp.body.continue: +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK15: arraydestroy.body: +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done6: +// CHECK15-NEXT: ret void +// +// +// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev +// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIfED2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK15-NEXT: ret void +// +// +// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 +// CHECK15-SAME: () #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) +// CHECK15-NEXT: ret void +// +// +// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK15-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 +// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK15: arrayctor.loop: +// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK15: arrayctor.cont: +// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK15: cond.true: +// CHECK15-NEXT: br label [[COND_END:%.*]] +// CHECK15: cond.false: +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END]] +// CHECK15: cond.end: +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15: omp.inner.for.cond.cleanup: +// CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK15: arraydestroy.body: +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done5: +// CHECK15-NEXT: ret void +// +// +// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK15-NEXT: ret void +// +// +// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined.omp_outlined +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK15-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 +// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK15: arrayctor.loop: +// CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK15: arrayctor.cont: +// CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK15: cond.true: +// CHECK15-NEXT: br label [[COND_END:%.*]] +// CHECK15: cond.false: +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END]] +// CHECK15: cond.end: +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15: omp.inner.for.cond.cleanup: +// CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK15: omp.body.continue: +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK15: arraydestroy.body: +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done7: +// CHECK15-NEXT: ret void +// +// +// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: call void @_ZN1SIiED2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]] +// CHECK15-NEXT: ret void +// +// +// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev +// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK15-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK15-NEXT: store float [[CONV]], ptr [[F]], align 4 +// CHECK15-NEXT: ret void +// +// +// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev +// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: ret void +// +// +// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 +// CHECK15-NEXT: ret void +// +// +// CHECK15-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK15-NEXT: ret void +// +// +// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 +// CHECK17-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK17-NEXT: entry: +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined) +// CHECK17-NEXT: ret void +// +// +// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK17-NEXT: entry: +// CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8 +// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17: cond.true: +// CHECK17-NEXT: br label [[COND_END:%.*]] +// CHECK17: cond.false: +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: br label [[COND_END]] +// CHECK17: cond.end: +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK17: omp.inner.for.cond: +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17: omp.inner.for.body: +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK17: omp.inner.for.inc: +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK17: omp.inner.for.end: +// CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK17: omp.loop.exit: +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK17-NEXT: ret void +// +// +// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined.omp_outlined +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK17-NEXT: entry: +// CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8 +// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17: cond.true: +// CHECK17-NEXT: br label [[COND_END:%.*]] +// CHECK17: cond.false: +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: br label [[COND_END]] +// CHECK17: cond.end: +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK17: omp.inner.for.cond: +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17: omp.inner.for.body: +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK17-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK17-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 +// CHECK17-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK17-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]] +// CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK17: omp.body.continue: +// CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK17: omp.inner.for.inc: +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK17: omp.inner.for.end: +// CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK17: omp.loop.exit: +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK17-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_teams_generic_loop_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_reduction_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_teams_generic_loop_reduction_codegen.cpp @@ -0,0 +1,1510 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK3 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK5 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK5 + +// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template +T tmain() { + T t_var = T(); + T vec[] = {1, 2}; +#pragma omp target teams distribute parallel for reduction(+: t_var) + for (int i = 0; i < 2; ++i) { + t_var += (T) i; + } + return T(); +} + +int main() { + static int sivar; +#ifdef LAMBDA + + [&]() { +#pragma omp target teams distribute parallel for reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + + // Skip global and bound tid vars + + + + // Skip global and bound tid vars, and prev lb and ub vars + // skip loop vars + + + sivar += i; + + [&]() { + + sivar += 4; + + }(); + } + }(); + return 0; +#else +#pragma omp target teams distribute parallel for reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + sivar += i; + } + return tmain(); +#endif +} + + + + +// Skip global and bound tid vars + + +// Skip global and bound tid vars, and prev lb and ub +// skip loop vars + + + + +// Skip global and bound tid vars + + +// Skip global and bound tid vars, and prev lb and ub vars +// skip loop vars + +#endif +// CHECK1-LABEL: define {{[^@]+}}@main +// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v() +// CHECK1-NEXT: ret i32 [[CALL]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66 +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined, ptr [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK1-NEXT: ] +// CHECK1: .omp.reduction.case1: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.case2: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.default: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK1-NEXT: ] +// CHECK1: .omp.reduction.case1: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.case2: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.default: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp_outlined.omp.reduction.reduction_func +// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp.reduction.reduction_func +// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// CHECK1-SAME: () #[[ATTR5:[0-9]+]] comdat { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: ret i32 0 +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32 +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined, ptr [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK1-NEXT: ] +// CHECK1: .omp.reduction.case1: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.case2: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.default: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK1-NEXT: ] +// CHECK1: .omp.reduction.case1: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.case2: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.default: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func +// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func +// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK1-SAME: () #[[ATTR7:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK1-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@main +// CHECK3-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK3-NEXT: ret i32 [[CALL]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66 +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined, ptr [[TMP0]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK3-NEXT: ] +// CHECK3: .omp.reduction.case1: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.case2: +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.default: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK3-NEXT: ] +// CHECK3: .omp.reduction.case1: +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.case2: +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.default: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp_outlined.omp.reduction.reduction_func +// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.omp_outlined.omp.reduction.reduction_func +// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// CHECK3-SAME: () #[[ATTR5:[0-9]+]] comdat { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.1, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: ret i32 0 +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32 +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined, ptr [[TMP0]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK3-NEXT: ] +// CHECK3: .omp.reduction.case1: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.case2: +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.default: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK3-NEXT: ] +// CHECK3: .omp.reduction.case1: +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.case2: +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.default: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func +// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func +// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK3-SAME: () #[[ATTR7:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK3-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@main +// CHECK5-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1 +// CHECK5-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK5-NEXT: call void @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// CHECK5-NEXT: ret i32 0 +// +// +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44 +// CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44.omp_outlined, ptr [[TMP0]]) +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44.omp_outlined +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5: cond.true: +// CHECK5-NEXT: br label [[COND_END:%.*]] +// CHECK5: cond.false: +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: br label [[COND_END]] +// CHECK5: cond.end: +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK5: omp.inner.for.cond: +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5: omp.inner.for.body: +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK5: omp.inner.for.inc: +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK5: omp.inner.for.end: +// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK5: omp.loop.exit: +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK5-NEXT: ] +// CHECK5: .omp.reduction.case1: +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK5: .omp.reduction.case2: +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK5: .omp.reduction.default: +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44.omp_outlined.omp_outlined +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: store i32 0, ptr [[SIVAR2]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5: cond.true: +// CHECK5-NEXT: br label [[COND_END:%.*]] +// CHECK5: cond.false: +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: br label [[COND_END]] +// CHECK5: cond.end: +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK5: omp.inner.for.cond: +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5: omp.inner.for.body: +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK5-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK5: omp.body.continue: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK5: omp.inner.for.inc: +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK5: omp.inner.for.end: +// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK5: omp.loop.exit: +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK5-NEXT: ] +// CHECK5: .omp.reduction.case1: +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK5: .omp.reduction.case2: +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4 +// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK5: .omp.reduction.default: +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44.omp_outlined.omp_outlined.omp.reduction.reduction_func +// CHECK5-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l44.omp_outlined.omp.reduction.reduction_func +// CHECK5-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK5-SAME: () #[[ATTR6:[0-9]+]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK5-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp @@ -0,0 +1,482 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +}; + +typedef enum omp_alloctrait_key_t { omp_atk_sync_hint = 1, + omp_atk_alignment = 2, + omp_atk_access = 3, + omp_atk_pool_size = 4, + omp_atk_fallback = 5, + omp_atk_fb_data = 6, + omp_atk_pinned = 7, + omp_atk_partition = 8 +} omp_alloctrait_key_t; +typedef enum omp_alloctrait_value_t { + omp_atv_false = 0, + omp_atv_true = 1, + omp_atv_default = 2, + omp_atv_contended = 3, + omp_atv_uncontended = 4, + omp_atv_sequential = 5, + omp_atv_private = 6, + omp_atv_all = 7, + omp_atv_thread = 8, + omp_atv_pteam = 9, + omp_atv_cgroup = 10, + omp_atv_default_mem_fb = 11, + omp_atv_null_fb = 12, + omp_atv_abort_fb = 13, + omp_atv_allocator_fb = 14, + omp_atv_environment = 15, + omp_atv_nearest = 16, + omp_atv_blocked = 17, + omp_atv_interleaved = 18 +} omp_alloctrait_value_t; + +typedef struct omp_alloctrait_t { + omp_alloctrait_key_t key; + __UINTPTR_TYPE__ value; +} omp_alloctrait_t; + +// Just map the traits variable as a firstprivate variable. + +void foo() { + omp_alloctrait_t traits[10]; + omp_allocator_handle_t my_allocator; + +#pragma omp target teams loop uses_allocators(omp_null_allocator, omp_thread_mem_alloc, my_allocator(traits)) + for (int i = 0; i < 10; ++i) + ; +} + + +// Destroy allocator upon exit from the region. + +#endif +// CHECK-64-LABEL: define {{[^@]+}}@_Z3foov +// CHECK-64-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[TRAITS:%.*]] = alloca [10 x %struct.omp_alloctrait_t], align 8 +// CHECK-64-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: store ptr [[TRAITS]], ptr [[TMP0]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: store ptr [[TRAITS]], ptr [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-64-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-64-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-64-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-64-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-64-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-64-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-64-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 10, ptr [[TMP13]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73.region_id, ptr [[KERNEL_ARGS]]) +// CHECK-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK-64-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-64: omp_offload.failed: +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73(ptr [[TRAITS]]) #[[ATTR2:[0-9]+]] +// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-64: omp_offload.cont: +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73 +// CHECK-64-SAME: (ptr noundef nonnull align 8 dereferenceable(160) [[TRAITS:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[TRAITS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: store ptr [[TRAITS]], ptr [[TRAITS_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TRAITS_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_init_allocator(i32 [[TMP0]], ptr null, i32 10, ptr [[TMP2]]) +// CHECK-64-NEXT: [[CONV:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK-64-NEXT: store i64 [[CONV]], ptr [[MY_ALLOCATOR]], align 8 +// CHECK-64-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[MY_ALLOCATOR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-64-NEXT: call void @__kmpc_destroy_allocator(i32 [[TMP0]], ptr [[CONV1]]) +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP1]]) +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_outlined..1 +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64: cond.true: +// CHECK-64-NEXT: br label [[COND_END:%.*]] +// CHECK-64: cond.false: +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END]] +// CHECK-64: cond.end: +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-64: omp.inner.for.cond: +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64: omp.inner.for.body: +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-64: omp.body.continue: +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-64: omp.inner.for.inc: +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-64: omp.inner.for.end: +// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-64: omp.loop.exit: +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]]) +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK-64-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-64-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@_Z3foov +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TRAITS:%.*]] = alloca [10 x %struct.omp_alloctrait_t], align 8 +// CHECK-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-NEXT: store i64 10, ptr [[TMP13]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.region_id, ptr [[KERNEL_ARGS]]) +// CHECK-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK: omp_offload.failed: +// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66(ptr [[TRAITS]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK: omp_offload.cont: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66 +// CHECK-SAME: (ptr noundef nonnull align 8 dereferenceable(160) [[TRAITS:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TRAITS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[MY_ALLOCATOR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: store ptr [[TRAITS]], ptr [[TRAITS_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TRAITS_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_init_allocator(i32 [[TMP0]], ptr null, i32 10, ptr [[TMP1]]) +// CHECK-NEXT: [[CONV:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK-NEXT: store i64 [[CONV]], ptr [[MY_ALLOCATOR]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined) +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[MY_ALLOCATOR]], align 8 +// CHECK-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-NEXT: call void @__kmpc_destroy_allocator(i32 [[TMP0]], ptr [[CONV1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK: omp.inner.for.end: +// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp.loop.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66.omp_outlined.omp_outlined +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK: omp.body.continue: +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK: omp.inner.for.end: +// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp.loop.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp b/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp @@ -0,0 +1,3545 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK3 + +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +#ifdef CK1 + +int a[100]; + +int teams_argument_global(int n){ + int te = n / 128; + int th = 128; + // discard n_addr + + #pragma omp target + #pragma omp teams loop num_teams(te), thread_limit(th) + for(int i = 0; i < n; i++) { + a[i] = 0; + } + + #pragma omp target + {{{ + #pragma omp teams loop + for(int i = 0; i < n; i++) { + a[i] = 0; + } + }}} + + // outlined target regions + + + + + return a[0]; +} + +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK9 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK9 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK11 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK11 + +// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +#ifdef CK2 + +int teams_local_arg(void) { + int n = 100; + int a[n]; + + #pragma omp target + #pragma omp teams loop + for(int i = 0; i < n; i++) { + a[i] = 0; + } + + // outlined target region + + + return a[0]; +} +#endif // CK2 + +// Test host codegen. +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK17 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK17 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK19 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK19 + +// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +#ifdef CK3 + + +template +struct SS{ + T a[X]; + float b; + int foo(void) { + + #pragma omp target + #pragma omp teams loop + for(int i = 0; i < X; i++) { + a[i] = (T)0; + } + + // outlined target region + + + return a[0]; + } +}; + +int teams_template_struct(void) { + SS V; + return V.foo(); + +} +#endif // CK3 + +// Test host codegen. +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK25 +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK25 +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK27 +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK27 + +// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +#ifdef CK4 + +template +int tmain(T argc) { + T a[n]; + int te = n/128; + int th = 128; +#pragma omp target +#pragma omp teams loop num_teams(te) thread_limit(th) + for(int i = 0; i < n; i++) { + a[i] = (T)0; + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int a[n]; +#pragma omp target +#pragma omp teams loop + for(int i = 0; i < n; i++) { + a[i] = 0; + } + return tmain(argc); +} + + + + + + + +#endif // CK4 +#endif +// CHECK1-LABEL: define {{[^@]+}}@_Z21teams_argument_globali +// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TH:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TE_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TH_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: [[N_CASTED4:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[_TMP8:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], 128 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TE]], align 4 +// CHECK1-NEXT: store i32 128, ptr [[TH]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TE]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TE_CASTED]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TE_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TH_CASTED]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TH_CASTED]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP6]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP6]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @a, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @a, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TE]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK1-NEXT: [[DIV2:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV2]], 1 +// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 +// CHECK1-NEXT: [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP21]], 0 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [[TMP26]], ptr [[TMP37]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 [[TMP21]], i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK1-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28(i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr @a) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP42]], ptr [[N_CASTED4]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i64, ptr [[N_CASTED4]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP43]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP43]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr @a, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr @a, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP52]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK1-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP53]], 0 +// CHECK1-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1 +// CHECK1-NEXT: store i32 [[SUB13]], ptr [[DOTCAPTURE_EXPR_10]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP54]], 1 +// CHECK1-NEXT: [[TMP55:%.*]] = zext i32 [[ADD14]] to i64 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP56]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, ptr [[TMP57]], align 4 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP50]], ptr [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP51]], ptr [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 8 +// CHECK1-NEXT: store i64 [[TMP55]], ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP66]], align 4 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP67]], align 4 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP68]], align 4 +// CHECK1-NEXT: [[TMP69:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.region_id, ptr [[KERNEL_ARGS15]]) +// CHECK1-NEXT: [[TMP70:%.*]] = icmp ne i32 [[TMP69]], 0 +// CHECK1-NEXT: br i1 [[TMP70]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]] +// CHECK1: omp_offload.failed16: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34(i64 [[TMP43]], ptr @a) #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT17]] +// CHECK1: omp_offload.cont17: +// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr @a, align 4 +// CHECK1-NEXT: ret i32 [[TMP71]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28 +// CHECK1-SAME: (i64 noundef [[TE:%.*]], i64 noundef [[TH:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK1-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined, ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined, i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP22]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34 +// CHECK1-SAME: (i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined, i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK1-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK1-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z21teams_argument_globali +// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TH:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TE_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TH_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: [[N_CASTED4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS6:%.*]] = alloca [2 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS7:%.*]] = alloca [2 x ptr], align 4 +// CHECK3-NEXT: [[_TMP8:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[KERNEL_ARGS15:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], 128 +// CHECK3-NEXT: store i32 [[DIV]], ptr [[TE]], align 4 +// CHECK3-NEXT: store i32 128, ptr [[TH]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TE]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TE_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TH_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TH_CASTED]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr @a, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr @a, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TE]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV2]], 1 +// CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 +// CHECK3-NEXT: [[TMP26:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP21]], 0 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP19]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 [[TMP25]], ptr [[TMP35]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [[TMP26]], ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 [[TMP21]], i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK3-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28(i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr @a) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP42]], ptr [[N_CASTED4]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[N_CASTED4]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP43]], ptr [[TMP44]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP43]], ptr [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS7]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP46]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 1 +// CHECK3-NEXT: store ptr @a, ptr [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 1 +// CHECK3-NEXT: store ptr @a, ptr [[TMP48]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS7]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP49]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP52:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP52]], ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK3-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_9]], align 4 +// CHECK3-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP53]], 0 +// CHECK3-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 +// CHECK3-NEXT: [[SUB13:%.*]] = sub nsw i32 [[DIV12]], 1 +// CHECK3-NEXT: store i32 [[SUB13]], ptr [[DOTCAPTURE_EXPR_10]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_10]], align 4 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP54]], 1 +// CHECK3-NEXT: [[TMP55:%.*]] = zext i32 [[ADD14]] to i64 +// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP56]], align 4 +// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, ptr [[TMP57]], align 4 +// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP50]], ptr [[TMP58]], align 4 +// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP51]], ptr [[TMP59]], align 4 +// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.1, ptr [[TMP60]], align 4 +// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP61]], align 4 +// CHECK3-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP62]], align 4 +// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP63]], align 4 +// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 8 +// CHECK3-NEXT: store i64 [[TMP55]], ptr [[TMP64]], align 8 +// CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP65]], align 8 +// CHECK3-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP66]], align 4 +// CHECK3-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP67]], align 4 +// CHECK3-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP68]], align 4 +// CHECK3-NEXT: [[TMP69:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.region_id, ptr [[KERNEL_ARGS15]]) +// CHECK3-NEXT: [[TMP70:%.*]] = icmp ne i32 [[TMP69]], 0 +// CHECK3-NEXT: br i1 [[TMP70]], label [[OMP_OFFLOAD_FAILED16:%.*]], label [[OMP_OFFLOAD_CONT17:%.*]] +// CHECK3: omp_offload.failed16: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34(i32 [[TMP43]], ptr @a) #[[ATTR2]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT17]] +// CHECK3: omp_offload.cont17: +// CHECK3-NEXT: [[TMP71:%.*]] = load i32, ptr @a, align 4 +// CHECK3-NEXT: ret i32 [[TMP71]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28 +// CHECK3-SAME: (i32 noundef [[TE:%.*]], i32 noundef [[TH:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK3-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 +// CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined, ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined, i32 [[TMP15]], i32 [[TMP16]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l28.omp_outlined.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34 +// CHECK3-SAME: (i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined, i32 [[TMP15]], i32 [[TMP16]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21teams_argument_globali_l34.omp_outlined.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK3-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK3-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@_Z15teams_local_argv +// CHECK9-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK9-NEXT: store i32 100, ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK9-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave() +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 4 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 24, i1 false) +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP5]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[ADD]] to i64 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 2, ptr [[TMP23]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 3, ptr [[TMP24]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP16]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP17]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP18]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK9-NEXT: store i64 0, ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP33]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK9-NEXT: store i32 0, ptr [[TMP35]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9: omp_offload.failed: +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72(i64 [[TMP4]], i64 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] +// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK9: omp_offload.cont: +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP39]]) +// CHECK9-NEXT: ret i32 [[TMP38]] +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72 +// CHECK9-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK9: omp.precond.then: +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9: cond.true: +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: br label [[COND_END:%.*]] +// CHECK9: cond.false: +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: br label [[COND_END]] +// CHECK9: cond.end: +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK9: omp.inner.for.cond: +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK9: omp.inner.for.inc: +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK9: omp.inner.for.end: +// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK9: omp.loop.exit: +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]]) +// CHECK9-NEXT: br label [[OMP_PRECOND_END]] +// CHECK9: omp.precond.end: +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK9: omp.precond.then: +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9: cond.true: +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: br label [[COND_END:%.*]] +// CHECK9: cond.false: +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: br label [[COND_END]] +// CHECK9: cond.end: +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK9: omp.inner.for.cond: +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK9: omp.body.continue: +// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK9: omp.inner.for.inc: +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK9: omp.inner.for.end: +// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK9: omp.loop.exit: +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK9-NEXT: br label [[OMP_PRECOND_END]] +// CHECK9: omp.precond.end: +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK9-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK9-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@_Z15teams_local_argv +// CHECK11-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK11-NEXT: store i32 100, ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave() +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: [[VLA:%.*]] = alloca i32, i32 [[TMP0]], align 4 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4 +// CHECK11-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 24, i1 false) +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 2 +// CHECK11-NEXT: store i64 [[TMP5]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = zext i32 [[ADD]] to i64 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 2, ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 3, ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP16]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP17]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP18]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[TMP22]], ptr [[TMP31]], align 8 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK11-NEXT: store i64 0, ptr [[TMP32]], align 8 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP33]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK11-NEXT: store i32 0, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK11-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11: omp_offload.failed: +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72(i32 [[TMP3]], i32 [[TMP0]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] +// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK11: omp_offload.cont: +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP39]]) +// CHECK11-NEXT: ret i32 [[TMP38]] +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72 +// CHECK11-SAME: (i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK11: omp.precond.then: +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK11-NEXT: br label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.end: +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z15teams_local_argv_l72.omp_outlined.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK11: omp.precond.then: +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK11: omp.body.continue: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK11-NEXT: br label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.end: +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK11-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK11-NEXT: ret void +// +// +// CHECK17-LABEL: define {{[^@]+}}@_Z21teams_template_structv +// CHECK17-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK17-NEXT: entry: +// CHECK17-NEXT: [[V:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 +// CHECK17-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZN2SSIiLi123ELx456EE3fooEv(ptr noundef nonnull align 4 dereferenceable(496) [[V]]) +// CHECK17-NEXT: ret i32 [[CALL]] +// +// +// CHECK17-LABEL: define {{[^@]+}}@_ZN2SSIiLi123ELx456EE3fooEv +// CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(496) [[THIS:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK17-NEXT: entry: +// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK17-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK17-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK17-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK17-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK17-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK17-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK17-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK17-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK17-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK17-NEXT: store i64 123, ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK17-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK17-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK17-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK17-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.region_id, ptr [[KERNEL_ARGS]]) +// CHECK17-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK17-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK17: omp_offload.failed: +// CHECK17-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] +// CHECK17-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK17: omp_offload.cont: +// CHECK17-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A2]], i64 0, i64 0 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK17-NEXT: ret i32 [[TMP20]] +// +// +// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108 +// CHECK17-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK17-NEXT: entry: +// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined, ptr [[TMP0]]) +// CHECK17-NEXT: ret void +// +// +// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-NEXT: entry: +// CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17: cond.true: +// CHECK17-NEXT: br label [[COND_END:%.*]] +// CHECK17: cond.false: +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: br label [[COND_END]] +// CHECK17: cond.end: +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK17: omp.inner.for.cond: +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17: omp.inner.for.body: +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK17: omp.inner.for.inc: +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK17: omp.inner.for.end: +// CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK17: omp.loop.exit: +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK17-NEXT: ret void +// +// +// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-NEXT: entry: +// CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17: cond.true: +// CHECK17-NEXT: br label [[COND_END:%.*]] +// CHECK17: cond.false: +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: br label [[COND_END]] +// CHECK17: cond.end: +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK17: omp.inner.for.cond: +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17: omp.inner.for.body: +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK17: omp.body.continue: +// CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK17: omp.inner.for.inc: +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK17: omp.inner.for.end: +// CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK17: omp.loop.exit: +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK17-NEXT: ret void +// +// +// CHECK17-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK17-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK17-NEXT: entry: +// CHECK17-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK17-NEXT: ret void +// +// +// CHECK19-LABEL: define {{[^@]+}}@_Z21teams_template_structv +// CHECK19-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK19-NEXT: entry: +// CHECK19-NEXT: [[V:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 +// CHECK19-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN2SSIiLi123ELx456EE3fooEv(ptr noundef nonnull align 4 dereferenceable(496) [[V]]) +// CHECK19-NEXT: ret i32 [[CALL]] +// +// +// CHECK19-LABEL: define {{[^@]+}}@_ZN2SSIiLi123ELx456EE3fooEv +// CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(496) [[THIS:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK19-NEXT: entry: +// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK19-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK19-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK19-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK19-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK19-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK19-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK19-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK19-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK19-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK19-NEXT: store i64 123, ptr [[TMP13]], align 8 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK19-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK19-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK19-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK19-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.region_id, ptr [[KERNEL_ARGS]]) +// CHECK19-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK19-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK19: omp_offload.failed: +// CHECK19-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] +// CHECK19-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK19: omp_offload.cont: +// CHECK19-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK19-NEXT: ret i32 [[TMP20]] +// +// +// CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108 +// CHECK19-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK19-NEXT: entry: +// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined, ptr [[TMP0]]) +// CHECK19-NEXT: ret void +// +// +// CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-NEXT: entry: +// CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK19: cond.true: +// CHECK19-NEXT: br label [[COND_END:%.*]] +// CHECK19: cond.false: +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: br label [[COND_END]] +// CHECK19: cond.end: +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK19: omp.inner.for.cond: +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19: omp.inner.for.body: +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK19: omp.inner.for.inc: +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK19: omp.inner.for.end: +// CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK19: omp.loop.exit: +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK19-NEXT: ret void +// +// +// CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l108.omp_outlined.omp_outlined +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-NEXT: entry: +// CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK19: cond.true: +// CHECK19-NEXT: br label [[COND_END:%.*]] +// CHECK19: cond.false: +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: br label [[COND_END]] +// CHECK19: cond.end: +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK19: omp.inner.for.cond: +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19: omp.inner.for.body: +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK19: omp.body.continue: +// CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK19: omp.inner.for.inc: +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK19: omp.inner.for.end: +// CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK19: omp.loop.exit: +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK19-NEXT: ret void +// +// +// CHECK19-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK19-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK19-NEXT: entry: +// CHECK19-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK19-NEXT: ret void +// +// +// CHECK25-LABEL: define {{[^@]+}}@main +// CHECK25-SAME: (i32 noundef signext [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK25-NEXT: entry: +// CHECK25-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK25-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK25-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 +// CHECK25-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 8 +// CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK25-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK25-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK25-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 +// CHECK25-NEXT: store i32 100, ptr [[N]], align 4 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, ptr [[N]], align 4 +// CHECK25-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK25-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave() +// CHECK25-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 +// CHECK25-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 4 +// CHECK25-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[N]], align 4 +// CHECK25-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 +// CHECK25-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK25-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 24, i1 false) +// CHECK25-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK25-NEXT: store i64 [[TMP4]], ptr [[TMP6]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK25-NEXT: store i64 [[TMP4]], ptr [[TMP7]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK25-NEXT: store ptr null, ptr [[TMP8]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK25-NEXT: store i64 [[TMP1]], ptr [[TMP9]], align 8 +// CHECK25-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK25-NEXT: store i64 [[TMP1]], ptr [[TMP10]], align 8 +// CHECK25-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK25-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[VLA]], ptr [[TMP12]], align 8 +// CHECK25-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[VLA]], ptr [[TMP13]], align 8 +// CHECK25-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 2 +// CHECK25-NEXT: store i64 [[TMP5]], ptr [[TMP14]], align 8 +// CHECK25-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK25-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK25-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[N]], align 4 +// CHECK25-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK25-NEXT: [[TMP22:%.*]] = zext i32 [[ADD]] to i64 +// CHECK25-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK25-NEXT: store i32 2, ptr [[TMP23]], align 4 +// CHECK25-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK25-NEXT: store i32 3, ptr [[TMP24]], align 4 +// CHECK25-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[TMP16]], ptr [[TMP25]], align 8 +// CHECK25-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK25-NEXT: store ptr [[TMP17]], ptr [[TMP26]], align 8 +// CHECK25-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK25-NEXT: store ptr [[TMP18]], ptr [[TMP27]], align 8 +// CHECK25-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK25-NEXT: store ptr @.offload_maptypes, ptr [[TMP28]], align 8 +// CHECK25-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK25-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK25-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK25-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK25-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK25-NEXT: store i64 [[TMP22]], ptr [[TMP31]], align 8 +// CHECK25-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK25-NEXT: store i64 0, ptr [[TMP32]], align 8 +// CHECK25-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK25-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP33]], align 4 +// CHECK25-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK25-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4 +// CHECK25-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK25-NEXT: store i32 0, ptr [[TMP35]], align 4 +// CHECK25-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.region_id, ptr [[KERNEL_ARGS]]) +// CHECK25-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK25-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK25: omp_offload.failed: +// CHECK25-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161(i64 [[TMP4]], i64 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] +// CHECK25-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK25: omp_offload.cont: +// CHECK25-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK25-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10EEiT_(i32 noundef signext [[TMP38]]) +// CHECK25-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 +// CHECK25-NEXT: [[TMP39:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK25-NEXT: call void @llvm.stackrestore(ptr [[TMP39]]) +// CHECK25-NEXT: [[TMP40:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK25-NEXT: ret i32 [[TMP40]] +// +// +// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161 +// CHECK25-SAME: (i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK25-NEXT: entry: +// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK25-NEXT: ret void +// +// +// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-NEXT: entry: +// CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 +// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK25-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK25: omp.precond.then: +// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK25-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK25: cond.true: +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: br label [[COND_END:%.*]] +// CHECK25: cond.false: +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: br label [[COND_END]] +// CHECK25: cond.end: +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK25: omp.inner.for.cond: +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK25-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK25: omp.inner.for.body: +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK25: omp.inner.for.inc: +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK25: omp.inner.for.end: +// CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK25: omp.loop.exit: +// CHECK25-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]]) +// CHECK25-NEXT: br label [[OMP_PRECOND_END]] +// CHECK25: omp.precond.end: +// CHECK25-NEXT: ret void +// +// +// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-NEXT: entry: +// CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 +// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK25-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK25: omp.precond.then: +// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK25-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK25-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK25-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK25: cond.true: +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: br label [[COND_END:%.*]] +// CHECK25: cond.false: +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: br label [[COND_END]] +// CHECK25: cond.end: +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK25: omp.inner.for.cond: +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK25-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK25: omp.inner.for.body: +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK25-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK25: omp.body.continue: +// CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK25: omp.inner.for.inc: +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK25-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK25: omp.inner.for.end: +// CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK25: omp.loop.exit: +// CHECK25-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK25-NEXT: br label [[OMP_PRECOND_END]] +// CHECK25: omp.precond.end: +// CHECK25-NEXT: ret void +// +// +// CHECK25-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_ +// CHECK25-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat { +// CHECK25-NEXT: entry: +// CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[A:%.*]] = alloca [10 x i32], align 4 +// CHECK25-NEXT: [[TE:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[TH:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[TE_CASTED:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[TH_CASTED:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK25-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK25-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 +// CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK25-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK25-NEXT: store i32 0, ptr [[TE]], align 4 +// CHECK25-NEXT: store i32 128, ptr [[TH]], align 4 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, ptr [[TE]], align 4 +// CHECK25-NEXT: store i32 [[TMP0]], ptr [[TE_CASTED]], align 4 +// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[TE_CASTED]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TH]], align 4 +// CHECK25-NEXT: store i32 [[TMP2]], ptr [[TH_CASTED]], align 4 +// CHECK25-NEXT: [[TMP3:%.*]] = load i64, ptr [[TH_CASTED]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK25-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK25-NEXT: store i64 [[TMP1]], ptr [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK25-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK25-NEXT: store i64 [[TMP3]], ptr [[TMP7]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK25-NEXT: store i64 [[TMP3]], ptr [[TMP8]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK25-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK25-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[A]], ptr [[TMP10]], align 8 +// CHECK25-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[A]], ptr [[TMP11]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK25-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK25-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[TE]], align 4 +// CHECK25-NEXT: [[TMP16:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0 +// CHECK25-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK25-NEXT: store i32 2, ptr [[TMP17]], align 4 +// CHECK25-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK25-NEXT: store i32 3, ptr [[TMP18]], align 4 +// CHECK25-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[TMP13]], ptr [[TMP19]], align 8 +// CHECK25-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK25-NEXT: store ptr [[TMP14]], ptr [[TMP20]], align 8 +// CHECK25-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK25-NEXT: store ptr @.offload_sizes.1, ptr [[TMP21]], align 8 +// CHECK25-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK25-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP22]], align 8 +// CHECK25-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK25-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK25-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK25-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK25-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK25-NEXT: store i64 10, ptr [[TMP25]], align 8 +// CHECK25-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK25-NEXT: store i64 0, ptr [[TMP26]], align 8 +// CHECK25-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK25-NEXT: store [3 x i32] [[TMP16]], ptr [[TMP27]], align 4 +// CHECK25-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK25-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP28]], align 4 +// CHECK25-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK25-NEXT: store i32 0, ptr [[TMP29]], align 4 +// CHECK25-NEXT: [[TMP30:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 [[TMP15]], i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.region_id, ptr [[KERNEL_ARGS]]) +// CHECK25-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK25-NEXT: br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK25: omp_offload.failed: +// CHECK25-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150(i64 [[TMP1]], i64 [[TMP3]], ptr [[A]]) #[[ATTR3]] +// CHECK25-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK25: omp_offload.cont: +// CHECK25-NEXT: ret i32 0 +// +// +// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150 +// CHECK25-SAME: (i64 noundef [[TE:%.*]], i64 noundef [[TH:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-NEXT: entry: +// CHECK25-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK25-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 +// CHECK25-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 +// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 +// CHECK25-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined, ptr [[TMP1]]) +// CHECK25-NEXT: ret void +// +// +// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-NEXT: entry: +// CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK25: cond.true: +// CHECK25-NEXT: br label [[COND_END:%.*]] +// CHECK25: cond.false: +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: br label [[COND_END]] +// CHECK25: cond.end: +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK25: omp.inner.for.cond: +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK25: omp.inner.for.body: +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK25: omp.inner.for.inc: +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK25: omp.inner.for.end: +// CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK25: omp.loop.exit: +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK25-NEXT: ret void +// +// +// CHECK25-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-NEXT: entry: +// CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK25-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK25: cond.true: +// CHECK25-NEXT: br label [[COND_END:%.*]] +// CHECK25: cond.false: +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: br label [[COND_END]] +// CHECK25: cond.end: +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK25: omp.inner.for.cond: +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK25-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK25: omp.inner.for.body: +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK25-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK25: omp.body.continue: +// CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK25: omp.inner.for.inc: +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK25-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK25: omp.inner.for.end: +// CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK25: omp.loop.exit: +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK25-NEXT: ret void +// +// +// CHECK25-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK25-SAME: () #[[ATTR6:[0-9]+]] { +// CHECK25-NEXT: entry: +// CHECK25-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK25-NEXT: ret void +// +// +// CHECK27-LABEL: define {{[^@]+}}@main +// CHECK27-SAME: (i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK27-NEXT: entry: +// CHECK27-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK27-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK27-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 +// CHECK27-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 4 +// CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK27-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK27-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4 +// CHECK27-NEXT: store i32 100, ptr [[N]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i32, ptr [[N]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave() +// CHECK27-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 4 +// CHECK27-NEXT: [[VLA:%.*]] = alloca i32, i32 [[TMP0]], align 4 +// CHECK27-NEXT: store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[N]], align 4 +// CHECK27-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP0]], 4 +// CHECK27-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64 +// CHECK27-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 24, i1 false) +// CHECK27-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK27-NEXT: store i32 [[TMP3]], ptr [[TMP6]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK27-NEXT: store i32 [[TMP3]], ptr [[TMP7]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK27-NEXT: store ptr null, ptr [[TMP8]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK27-NEXT: store i32 [[TMP0]], ptr [[TMP9]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK27-NEXT: store i32 [[TMP0]], ptr [[TMP10]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK27-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[VLA]], ptr [[TMP12]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[VLA]], ptr [[TMP13]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 2 +// CHECK27-NEXT: store i64 [[TMP5]], ptr [[TMP14]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK27-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[N]], align 4 +// CHECK27-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK27-NEXT: [[TMP22:%.*]] = zext i32 [[ADD]] to i64 +// CHECK27-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK27-NEXT: store i32 2, ptr [[TMP23]], align 4 +// CHECK27-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK27-NEXT: store i32 3, ptr [[TMP24]], align 4 +// CHECK27-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[TMP16]], ptr [[TMP25]], align 4 +// CHECK27-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK27-NEXT: store ptr [[TMP17]], ptr [[TMP26]], align 4 +// CHECK27-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK27-NEXT: store ptr [[TMP18]], ptr [[TMP27]], align 4 +// CHECK27-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK27-NEXT: store ptr @.offload_maptypes, ptr [[TMP28]], align 4 +// CHECK27-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK27-NEXT: store ptr null, ptr [[TMP29]], align 4 +// CHECK27-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK27-NEXT: store ptr null, ptr [[TMP30]], align 4 +// CHECK27-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK27-NEXT: store i64 [[TMP22]], ptr [[TMP31]], align 8 +// CHECK27-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK27-NEXT: store i64 0, ptr [[TMP32]], align 8 +// CHECK27-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK27-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP33]], align 4 +// CHECK27-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK27-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP34]], align 4 +// CHECK27-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK27-NEXT: store i32 0, ptr [[TMP35]], align 4 +// CHECK27-NEXT: [[TMP36:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.region_id, ptr [[KERNEL_ARGS]]) +// CHECK27-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK27-NEXT: br i1 [[TMP37]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK27: omp_offload.failed: +// CHECK27-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161(i32 [[TMP3]], i32 [[TMP0]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] +// CHECK27-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK27: omp_offload.cont: +// CHECK27-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK27-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_(i32 noundef [[TMP38]]) +// CHECK27-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 +// CHECK27-NEXT: [[TMP39:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK27-NEXT: call void @llvm.stackrestore(ptr [[TMP39]]) +// CHECK27-NEXT: [[TMP40:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK27-NEXT: ret i32 [[TMP40]] +// +// +// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161 +// CHECK27-SAME: (i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK27-NEXT: entry: +// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK27-NEXT: ret void +// +// +// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-NEXT: entry: +// CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 +// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK27-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK27: omp.precond.then: +// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK27: cond.true: +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: br label [[COND_END:%.*]] +// CHECK27: cond.false: +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: br label [[COND_END]] +// CHECK27: cond.end: +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK27: omp.inner.for.cond: +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK27: omp.inner.for.body: +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK27: omp.inner.for.inc: +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK27: omp.inner.for.end: +// CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK27: omp.loop.exit: +// CHECK27-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK27-NEXT: br label [[OMP_PRECOND_END]] +// CHECK27: omp.precond.end: +// CHECK27-NEXT: ret void +// +// +// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l161.omp_outlined.omp_outlined +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-NEXT: entry: +// CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 +// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK27-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK27: omp.precond.then: +// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK27: cond.true: +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: br label [[COND_END:%.*]] +// CHECK27: cond.false: +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: br label [[COND_END]] +// CHECK27: cond.end: +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK27: omp.inner.for.cond: +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK27: omp.inner.for.body: +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK27-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK27: omp.body.continue: +// CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK27: omp.inner.for.inc: +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK27-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK27: omp.inner.for.end: +// CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK27: omp.loop.exit: +// CHECK27-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) +// CHECK27-NEXT: br label [[OMP_PRECOND_END]] +// CHECK27: omp.precond.end: +// CHECK27-NEXT: ret void +// +// +// CHECK27-LABEL: define {{[^@]+}}@_Z5tmainIiLi10EEiT_ +// CHECK27-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat { +// CHECK27-NEXT: entry: +// CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[A:%.*]] = alloca [10 x i32], align 4 +// CHECK27-NEXT: [[TE:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[TH:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[TE_CASTED:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[TH_CASTED:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK27-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK27-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 +// CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK27-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK27-NEXT: store i32 0, ptr [[TE]], align 4 +// CHECK27-NEXT: store i32 128, ptr [[TH]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i32, ptr [[TE]], align 4 +// CHECK27-NEXT: store i32 [[TMP0]], ptr [[TE_CASTED]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[TE_CASTED]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TH]], align 4 +// CHECK27-NEXT: store i32 [[TMP2]], ptr [[TH_CASTED]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_CASTED]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK27-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK27-NEXT: store i32 [[TMP1]], ptr [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK27-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK27-NEXT: store i32 [[TMP3]], ptr [[TMP7]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK27-NEXT: store i32 [[TMP3]], ptr [[TMP8]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK27-NEXT: store ptr null, ptr [[TMP9]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[A]], ptr [[TMP10]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[A]], ptr [[TMP11]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK27-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[TE]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP15]], 0 +// CHECK27-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK27-NEXT: store i32 2, ptr [[TMP17]], align 4 +// CHECK27-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK27-NEXT: store i32 3, ptr [[TMP18]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[TMP13]], ptr [[TMP19]], align 4 +// CHECK27-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK27-NEXT: store ptr [[TMP14]], ptr [[TMP20]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK27-NEXT: store ptr @.offload_sizes.1, ptr [[TMP21]], align 4 +// CHECK27-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK27-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP22]], align 4 +// CHECK27-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK27-NEXT: store ptr null, ptr [[TMP23]], align 4 +// CHECK27-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK27-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CHECK27-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK27-NEXT: store i64 10, ptr [[TMP25]], align 8 +// CHECK27-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK27-NEXT: store i64 0, ptr [[TMP26]], align 8 +// CHECK27-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK27-NEXT: store [3 x i32] [[TMP16]], ptr [[TMP27]], align 4 +// CHECK27-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK27-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP28]], align 4 +// CHECK27-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK27-NEXT: store i32 0, ptr [[TMP29]], align 4 +// CHECK27-NEXT: [[TMP30:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 [[TMP15]], i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.region_id, ptr [[KERNEL_ARGS]]) +// CHECK27-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK27-NEXT: br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK27: omp_offload.failed: +// CHECK27-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150(i32 [[TMP1]], i32 [[TMP3]], ptr [[A]]) #[[ATTR3]] +// CHECK27-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK27: omp_offload.cont: +// CHECK27-NEXT: ret i32 0 +// +// +// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150 +// CHECK27-SAME: (i32 noundef [[TE:%.*]], i32 noundef [[TH:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-NEXT: entry: +// CHECK27-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK27-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 +// CHECK27-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 +// CHECK27-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined, ptr [[TMP1]]) +// CHECK27-NEXT: ret void +// +// +// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-NEXT: entry: +// CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK27: cond.true: +// CHECK27-NEXT: br label [[COND_END:%.*]] +// CHECK27: cond.false: +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: br label [[COND_END]] +// CHECK27: cond.end: +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK27: omp.inner.for.cond: +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK27: omp.inner.for.body: +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK27: omp.inner.for.inc: +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK27: omp.inner.for.end: +// CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK27: omp.loop.exit: +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK27-NEXT: ret void +// +// +// CHECK27-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10EEiT__l150.omp_outlined.omp_outlined +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-NEXT: entry: +// CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK27-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK27: cond.true: +// CHECK27-NEXT: br label [[COND_END:%.*]] +// CHECK27: cond.false: +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: br label [[COND_END]] +// CHECK27: cond.end: +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK27: omp.inner.for.cond: +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK27: omp.inner.for.body: +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK27-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +// CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK27: omp.body.continue: +// CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK27: omp.inner.for.inc: +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK27-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK27: omp.inner.for.end: +// CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK27: omp.loop.exit: +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK27-NEXT: ret void +// +// +// CHECK27-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK27-SAME: () #[[ATTR6:[0-9]+]] { +// CHECK27-NEXT: entry: +// CHECK27-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK27-NEXT: ret void +// diff --git a/clang/test/OpenMP/teams_generic_loop_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/teams_generic_loop_codegen.cpp @@ -0,0 +1,770 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER +int foo() { + int i; + int j; + int sum[10][10]; + + #pragma omp teams loop reduction(+:sum) collapse(2) bind(parallel) \ + order(concurrent) lastprivate(j) + for(i=0; i<10; i++) + for(j=0; j<10; j++) + sum[i][j] += i; + + return 0; +} +#endif +// IR-LABEL: define {{[^@]+}}@_Z3foov +// IR-SAME: () #[[ATTR0:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @_Z3foov.omp_outlined, ptr [[J]], ptr [[SUM]]) +// IR-NEXT: ret i32 0 +// +// +// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J4:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8 +// IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 +// IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0 +// IR-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 +// IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] +// IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] +// IR: omp.arrayinit.body: +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] +// IR: omp.arrayinit.done: +// IR-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// IR-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @_Z3foov.omp_outlined.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[J3]], ptr [[SUM1]]) +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// IR-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]]) +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// IR-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR: .omp.lastprivate.then: +// IR-NEXT: store i32 10, ptr [[J3]], align 4 +// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3]], align 4 +// IR-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4 +// IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// IR: .omp.lastprivate.done: +// IR-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// IR-NEXT: store ptr [[SUM1]], ptr [[TMP21]], align 8 +// IR-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// IR-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// IR-NEXT: ] +// IR: .omp.reduction.case1: +// IR-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP25]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR: omp.arraycpy.body: +// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 +// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// IR-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP25]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// IR: omp.arraycpy.done10: +// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR: .omp.reduction.case2: +// IR-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP1]], [[TMP28]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] +// IR: omp.arraycpy.body12: +// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 +// IR-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP29]] monotonic, align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP28]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] +// IR: omp.arraycpy.done18: +// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR: .omp.reduction.default: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J3:%.*]] = alloca i32, align 4 +// IR-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[J5:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8 +// IR-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 +// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 +// IR-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 +// IR-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 +// IR-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 +// IR-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// IR-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] +// IR: omp.arrayinit.body: +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// IR-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] +// IR: omp.arrayinit.done: +// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 +// IR-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// IR-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP12]], 10 +// IR-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP14]], 10 +// IR-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 +// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], [[MUL8]] +// IR-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 +// IR-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// IR-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 +// IR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP17]] to i64 +// IR-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP18]], [[TMP15]] +// IR-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], 1 +// IR-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) +// IR-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// IR-NEXT: store ptr [[SUM4]], ptr [[TMP22]], align 8 +// IR-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// IR-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// IR-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// IR-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// IR-NEXT: ] +// IR: .omp.reduction.case1: +// IR-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP26]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR: omp.arraycpy.body: +// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// IR-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP26]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] +// IR: omp.arraycpy.done19: +// IR-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR: .omp.reduction.case2: +// IR-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP1]], [[TMP29]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] +// IR: omp.arraycpy.body21: +// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] +// IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 +// IR-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP30]] monotonic, align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP29]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] +// IR: omp.arraycpy.done27: +// IR-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR: .omp.reduction.default: +// IR-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// IR-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR: .omp.lastprivate.then: +// IR-NEXT: store i32 10, ptr [[J3]], align 4 +// IR-NEXT: [[TMP34:%.*]] = load i32, ptr [[J3]], align 4 +// IR-NEXT: store i32 [[TMP34]], ptr [[TMP0]], align 4 +// IR-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// IR: .omp.lastprivate.done: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func +// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR: omp.arraycpy.body: +// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// IR: omp.arraycpy.done2: +// IR-NEXT: ret void +// +// +// IR-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp.reduction.reduction_func +// IR-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// IR-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// IR-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// IR-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// IR-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// IR-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// IR-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// IR-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// IR-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 +// IR-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR: omp.arraycpy.body: +// IR-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// IR-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// IR-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// IR: omp.arraycpy.done2: +// IR-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov +// IR-PCH-SAME: () #[[ATTR0:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[SUM:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 2, ptr @_Z3foov.omp_outlined, ptr [[J]], ptr [[SUM]]) +// IR-PCH-NEXT: ret i32 0 +// +// +// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[SUM1:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J4:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 +// IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM1]], i32 0, i32 0, i32 0 +// IR-PCH-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] +// IR-PCH: omp.arrayinit.body: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] +// IR-PCH: omp.arrayinit.done: +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// IR-PCH-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// IR-PCH-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// IR-PCH-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// IR-PCH-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @_Z3foov.omp_outlined.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[J3]], ptr [[SUM1]]) +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]]) +// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// IR-PCH-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR-PCH: .omp.lastprivate.then: +// IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4 +// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4 +// IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// IR-PCH: .omp.lastprivate.done: +// IR-PCH-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// IR-PCH-NEXT: store ptr [[SUM1]], ptr [[TMP21]], align 8 +// IR-PCH-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// IR-PCH-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// IR-PCH-NEXT: ] +// IR-PCH: .omp.reduction.case1: +// IR-PCH-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP25]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-PCH: omp.arraycpy.body: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 +// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// IR-PCH-NEXT: store i32 [[ADD7]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP25]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// IR-PCH: omp.arraycpy.done10: +// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP23]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR-PCH: .omp.reduction.case2: +// IR-PCH-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP1]], [[TMP28]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] +// IR-PCH: omp.arraycpy.body12: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[SUM1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// IR-PCH-NEXT: [[TMP29:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], align 4 +// IR-PCH-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 [[TMP29]] monotonic, align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP28]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY12]] +// IR-PCH: omp.arraycpy.done18: +// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR-PCH: .omp.reduction.default: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined +// IR-PCH-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[SUM:%.*]]) #[[ATTR1]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// IR-PCH-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J3:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[SUM4:%.*]] = alloca [10 x [10 x i32]], align 16 +// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[J5:%.*]] = alloca i32, align 4 +// IR-PCH-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// IR-PCH-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8 +// IR-PCH-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8 +// IR-PCH-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SUM_ADDR]], align 8 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// IR-PCH-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 +// IR-PCH-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// IR-PCH-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32 +// IR-PCH-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-PCH-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i32 0, i32 0, i32 0 +// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] +// IR-PCH: omp.arrayinit.body: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// IR-PCH-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] +// IR-PCH: omp.arrayinit.done: +// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 +// IR-PCH-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR-PCH: cond.true: +// IR-PCH-NEXT: br label [[COND_END:%.*]] +// IR-PCH: cond.false: +// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: br label [[COND_END]] +// IR-PCH: cond.end: +// IR-PCH-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// IR-PCH-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-PCH-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR-PCH: omp.inner.for.cond: +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// IR-PCH-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// IR-PCH-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR-PCH: omp.inner.for.body: +// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP12]], 10 +// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP14]], 10 +// IR-PCH-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10 +// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], [[MUL8]] +// IR-PCH-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1 +// IR-PCH-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// IR-PCH-NEXT: store i32 [[ADD10]], ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[SUM4]], i64 0, i64 [[IDXPROM]] +// IR-PCH-NEXT: [[TMP17:%.*]] = load i32, ptr [[J3]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP17]] to i64 +// IR-PCH-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] +// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP18]], [[TMP15]] +// IR-PCH-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR-PCH: omp.body.continue: +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR-PCH: omp.inner.for.inc: +// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], 1 +// IR-PCH-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// IR-PCH: omp.inner.for.end: +// IR-PCH-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR-PCH: omp.loop.exit: +// IR-PCH-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// IR-PCH-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) +// IR-PCH-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// IR-PCH-NEXT: store ptr [[SUM4]], ptr [[TMP22]], align 8 +// IR-PCH-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-PCH-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// IR-PCH-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// IR-PCH-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// IR-PCH-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// IR-PCH-NEXT: ] +// IR-PCH: .omp.reduction.case1: +// IR-PCH-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP26]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-PCH: omp.arraycpy.body: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[TMP27:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 +// IR-PCH-NEXT: [[TMP28:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// IR-PCH-NEXT: store i32 [[ADD16]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP26]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY]] +// IR-PCH: omp.arraycpy.done19: +// IR-PCH-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR-PCH: .omp.reduction.case2: +// IR-PCH-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP1]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY20:%.*]] = icmp eq ptr [[TMP1]], [[TMP29]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY20]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY21:%.*]] +// IR-PCH: omp.arraycpy.body21: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST22:%.*]] = phi ptr [ [[SUM4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY21]] ] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY21]] ] +// IR-PCH-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], align 4 +// IR-PCH-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 [[TMP30]] monotonic, align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST22]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP29]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY21]] +// IR-PCH: omp.arraycpy.done27: +// IR-PCH-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// IR-PCH: .omp.reduction.default: +// IR-PCH-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-PCH-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// IR-PCH-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// IR-PCH: .omp.lastprivate.then: +// IR-PCH-NEXT: store i32 10, ptr [[J3]], align 4 +// IR-PCH-NEXT: [[TMP34:%.*]] = load i32, ptr [[J3]], align 4 +// IR-PCH-NEXT: store i32 [[TMP34]], ptr [[TMP0]], align 4 +// IR-PCH-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// IR-PCH: .omp.lastprivate.done: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp_outlined.omp.reduction.reduction_func +// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-PCH: omp.arraycpy.body: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// IR-PCH: omp.arraycpy.done2: +// IR-PCH-NEXT: ret void +// +// +// IR-PCH-LABEL: define {{[^@]+}}@_Z3foov.omp_outlined.omp.reduction.reduction_func +// IR-PCH-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// IR-PCH-NEXT: entry: +// IR-PCH-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// IR-PCH-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// IR-PCH-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// IR-PCH-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// IR-PCH-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// IR-PCH-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// IR-PCH-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// IR-PCH-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// IR-PCH-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// IR-PCH-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 100 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// IR-PCH: omp.arraycpy.body: +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// IR-PCH-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// IR-PCH-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// IR-PCH-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// IR-PCH: omp.arraycpy.done2: +// IR-PCH-NEXT: ret void +// diff --git a/clang/test/OpenMP/teams_generic_loop_collapse_codgen.cpp b/clang/test/OpenMP/teams_generic_loop_collapse_codgen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/teams_generic_loop_collapse_codgen.cpp @@ -0,0 +1,1894 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK3 + +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +#ifdef CK1 + +template +struct SS{ + T a[X][Y]; + + int foo(void) { + + #pragma omp target + #pragma omp teams loop collapse(2) + for(int i = 0; i < X; i++) { + for(int j = 0; j < Y; j++) { + a[i][j] = (T)0; + } + } + + // discard loop variables not needed here + + + return a[0][0]; + } +}; + +int teams_template_struct(void) { + SS V; + return V.foo(); + +} +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK9 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK9 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK11 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK11 + +// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +#ifdef CK2 + +template +int tmain(T argc) { + T a[n][m]; + #pragma omp target + #pragma omp teams loop collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = (T)0; + } + } + return 0; +} + +int main (int argc, char **argv) { + int n = 100; + int m = 2; + int a[n][m]; + #pragma omp target + #pragma omp teams loop collapse(2) + for(int i = 0; i < n; i++) { + for(int j = 0; j < m; j++) { + a[i][j] = 0; + } + } + return tmain(argc); +} + + + + + + + + +// discard loop variables not needed here + + +#endif // CK2 +#endif // #ifndef HEADER +// CHECK1-LABEL: define {{[^@]+}}@_Z21teams_template_structv +// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[V:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZN2SSIiLi123ELx456EE3fooEv(ptr noundef nonnull align 4 dereferenceable(224352) [[V]]) +// CHECK1-NEXT: ret i32 [[CALL]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN2SSIiLi123ELx456EE3fooEv +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(224352) [[THIS:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: ret i32 [[TMP20]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 +// CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined, ptr [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] +// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK1-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK1-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z21teams_template_structv +// CHECK3-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[V:%.*]] = alloca [[STRUCT_SS:%.*]], align 4 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN2SSIiLi123ELx456EE3fooEv(ptr noundef nonnull align 4 dereferenceable(224352) [[V]]) +// CHECK3-NEXT: ret i32 [[CALL]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN2SSIiLi123ELx456EE3fooEv +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(224352) [[THIS:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: ret i32 [[TMP20]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 +// CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined, ptr [[TMP0]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.omp_outlined.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK3-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK3-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@main +// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 +// CHECK9-NEXT: store i32 100, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 2, ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 +// CHECK9-NEXT: [[TMP4:%.*]] = call ptr @llvm.stacksave() +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] +// CHECK9-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP5]], align 4 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[__VLA_EXPR1]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N_CASTED]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[M_CASTED]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[M_CASTED]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] +// CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false) +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 2, ptr [[TMP36]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK9-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CHECK9-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP46]], align 4 +// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 +// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK9-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CHECK9-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK9-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9: omp_offload.failed: +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] +// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK9: omp_offload.cont: +// CHECK9-NEXT: [[TMP51:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP51]]) +// CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 +// CHECK9-NEXT: [[TMP52:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP52]]) +// CHECK9-NEXT: [[TMP53:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP53]] +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83 +// CHECK9-SAME: (i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined, ptr [[N_ADDR]], ptr [[M_ADDR]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK9: land.lhs.true: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9: omp.precond.then: +// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9: cond.true: +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: br label [[COND_END:%.*]] +// CHECK9: cond.false: +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: br label [[COND_END]] +// CHECK9: cond.end: +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK9: omp.inner.for.cond: +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined, i64 [[TMP21]], i64 [[TMP22]], ptr [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]]) +// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK9: omp.inner.for.inc: +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK9: omp.inner.for.end: +// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK9: omp.loop.exit: +// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP26]]) +// CHECK9-NEXT: br label [[OMP_PRECOND_END]] +// CHECK9: omp.precond.end: +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK9: land.lhs.true: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9: omp.precond.then: +// CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9: cond.true: +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: br label [[COND_END:%.*]] +// CHECK9: cond.false: +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: br label [[COND_END]] +// CHECK9: cond.end: +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK9: omp.inner.for.cond: +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP23]], [[CONV18]] +// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP26]], [[CONV25]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP25]], [[MUL31]] +// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[TMP30:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP30]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4 +// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK9: omp.body.continue: +// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK9: omp.inner.for.inc: +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP32]], 1 +// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK9: omp.inner.for.end: +// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK9: omp.loop.exit: +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]]) +// CHECK9-NEXT: br label [[OMP_PRECOND_END]] +// CHECK9: omp.precond.end: +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_ +// CHECK9-SAME: (i32 noundef signext [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[A:%.*]] = alloca [10 x [2 x i32]], align 4 +// CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.1, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK9-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK9-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9: omp_offload.failed: +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69(ptr [[A]]) #[[ATTR3]] +// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK9: omp_offload.cont: +// CHECK9-NEXT: ret i32 0 +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69 +// CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined, ptr [[TMP0]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9: cond.true: +// CHECK9-NEXT: br label [[COND_END:%.*]] +// CHECK9: cond.false: +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: br label [[COND_END]] +// CHECK9: cond.end: +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK9: omp.inner.for.cond: +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK9: omp.inner.for.inc: +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK9: omp.inner.for.end: +// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK9: omp.loop.exit: +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9: cond.true: +// CHECK9-NEXT: br label [[COND_END:%.*]] +// CHECK9: cond.false: +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: br label [[COND_END]] +// CHECK9: cond.end: +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK9: omp.inner.for.cond: +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 +// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK9: omp.body.continue: +// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK9: omp.inner.for.inc: +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK9: omp.inner.for.end: +// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK9: omp.loop.exit: +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK9-SAME: () #[[ATTR6:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK9-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@main +// CHECK11-SAME: (i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 4 +// CHECK11-NEXT: store i32 100, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 2, ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave() +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]] +// CHECK11-NEXT: [[VLA:%.*]] = alloca i32, i32 [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[__VLA_EXPR0]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[__VLA_EXPR1]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[M_CASTED]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[M_CASTED]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP0]], [[TMP1]] +// CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 +// CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false) +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 2, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8 +// CHECK11-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK11-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CHECK11-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP45]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP46]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK11-NEXT: store i32 0, ptr [[TMP47]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK11-NEXT: br i1 [[TMP49]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11: omp_offload.failed: +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] +// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK11: omp_offload.cont: +// CHECK11-NEXT: [[TMP50:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP50]]) +// CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 +// CHECK11-NEXT: [[TMP51:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP51]]) +// CHECK11-NEXT: [[TMP52:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP52]] +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83 +// CHECK11-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined, ptr [[N_ADDR]], ptr [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 +// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK11: land.lhs.true: +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] +// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.then: +// CHECK11-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined, i32 [[TMP22]], i32 [[TMP24]], ptr [[TMP0]], ptr [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]]) +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP28]]) +// CHECK11-NEXT: br label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.end: +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.omp_outlined.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 +// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK11: land.lhs.true: +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] +// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.then: +// CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK11-NEXT: store i64 [[CONV11]], ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[CONV12]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 +// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] +// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 +// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP23]], [[CONV20]] +// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] +// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 +// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] +// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP26]], [[CONV27]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] +// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] +// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP25]], [[MUL33]] +// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 +// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] +// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 +// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I13]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = mul nsw i32 [[TMP29]], [[TMP3]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[J14]], align 4 +// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP31]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4 +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK11: omp.body.continue: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP32]], 1 +// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]]) +// CHECK11-NEXT: br label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.end: +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiLi10ELi2EEiT_ +// CHECK11-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca [10 x [2 x i32]], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.1, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK11-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK11-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK11-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11: omp_offload.failed: +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69(ptr [[A]]) #[[ATTR3]] +// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK11: omp_offload.cont: +// CHECK11-NEXT: ret i32 0 +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69 +// CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined, ptr [[TMP0]]) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.omp_outlined.omp_outlined +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK11: omp.body.continue: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK11-NEXT: ret void +// +// +// CHECK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK11-SAME: () #[[ATTR6:[0-9]+]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK11-NEXT: ret void +// diff --git a/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp @@ -0,0 +1,1918 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK3 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK3 + +// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK9 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix=CHECK9 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &st) : a(st.a + st.b), b(0) {} + ~St() {} +}; + +volatile int g = 1212; +volatile int &g1 = g; + +template +struct S { + T f; + S(T a) : f(a + g) {} + S() : f(g) {} + S(const S &s, St t = St()) : f(s.f + t.a) {} + operator T() { return T(); } + ~S() {} +}; + + +template +T tmain() { + S test; + T t_var = T(); + T vec[] = {1, 2}; + S s_arr[] = {1, 2}; + S &var = test; +#pragma omp target +#pragma omp teams loop private(t_var, vec, s_arr, var) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + } + return T(); +} + +S test; +int t_var = 333; +int vec[] = {1, 2}; +S s_arr[] = {1, 2}; +S var(3); + +int main() { + static int sivar; +#ifdef LAMBDA + [&]() { +#pragma omp target +#pragma omp teams loop private(g, g1, sivar) + for (int i = 0; i < 2; ++i) { + + // Skip global, bound tid and loop vars + + g = 1; + g1 = 1; + sivar = 2; + + // Skip global, bound tid and loop vars + [&]() { + g = 2; + g1 = 2; + sivar = 4; + + }(); + } + }(); + return 0; +#else +#pragma omp target +#pragma omp teams loop private(t_var, vec, s_arr, var, sivar) + for (int i = 0; i < 2; ++i) { + vec[i] = t_var; + s_arr[i] = var; + sivar += i; + } + return tmain(); +#endif +} + + + +// Skip global, bound tid and loop vars + +// private(s_arr) + +// private(var) + + +// Skip global, bound tid and loop vars + +// private(s_arr) + +// private(var) + + + + +// Skip global, bound tid and loop vars + +// private(s_arr) + + +// private(var) + + +// Skip global, bound tid and loop vars +// prev lb and ub +// iter variables + +// private(s_arr) + + +// private(var) + + + +#endif +// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init +// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @test) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN1SIfED1Ev, ptr @test, ptr @__dso_handle) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIfED2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK1-NEXT: store float [[CONV]], ptr [[F]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) @s_arr, float noundef 1.000000e+00) +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S:%.*]], ptr @s_arr, i64 1), float noundef 2.000000e+00) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr null, ptr @__dso_handle) #[[ATTR2]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ef +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], float noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfEC2Ef(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], float noundef [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_array_dtor +// CHECK1-SAME: (ptr noundef [[TMP0:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1: arraydestroy.body: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S:%.*]], ptr @s_arr, i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @s_arr +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done1: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], float noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to float +// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]] +// CHECK1-NEXT: store float [[ADD]], ptr [[F]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) @var, float noundef 3.000000e+00) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN1SIfED1Ev, ptr @var, ptr @__dso_handle) #[[ATTR2]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@main +// CHECK1-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96() #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v() +// CHECK1-NEXT: ret i32 [[CALL]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96 +// CHECK1-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK1: arrayctor.loop: +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK1: arrayctor.cont: +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1: omp.inner.for.cond.cleanup: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1: arraydestroy.body: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done3: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK1: arrayctor.loop: +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK1: arrayctor.cont: +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1: omp.inner.for.cond.cleanup: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1: arraydestroy.body: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) +// CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56() #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1: arraydestroy.body: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done2: +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIiEC2Ei(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef signext [[TMP0]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 +// CHECK1-SAME: () #[[ATTR4]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK1: arrayctor.loop: +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK1: arrayctor.cont: +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1: omp.inner.for.cond.cleanup: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP14]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1: arraydestroy.body: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK1: arrayctor.loop: +// CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK1: arrayctor.cont: +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1: omp.inner.for.cond.cleanup: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1: arraydestroy.body: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done9: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiED2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef signext [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[F]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_teams_generic_loop_private_codegen.cpp +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__cxx_global_var_init() +// CHECK1-NEXT: call void @__cxx_global_var_init.1() +// CHECK1-NEXT: call void @__cxx_global_var_init.2() +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK1-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init +// CHECK3-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @test) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN1SIfED1Ev, ptr @test, ptr @__dso_handle) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIfED2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK3-NEXT: store float [[CONV]], ptr [[F]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 +// CHECK3-SAME: () #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) @s_arr, float noundef 1.000000e+00) +// CHECK3-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S:%.*]], ptr @s_arr, i32 1), float noundef 2.000000e+00) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr null, ptr @__dso_handle) #[[ATTR2]] +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ef +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], float noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIfEC2Ef(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], float noundef [[TMP0]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_array_dtor +// CHECK3-SAME: (ptr noundef [[TMP0:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK3: arraydestroy.body: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S:%.*]], ptr @s_arr, i32 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @s_arr +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done1: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], float noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to float +// CHECK3-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]] +// CHECK3-NEXT: store float [[ADD]], ptr [[F]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 +// CHECK3-SAME: () #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) @var, float noundef 3.000000e+00) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN1SIfED1Ev, ptr @var, ptr @__dso_handle) #[[ATTR2]] +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@main +// CHECK3-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96() #[[ATTR2]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK3-NEXT: ret i32 [[CALL]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96 +// CHECK3-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK3: arrayctor.loop: +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK3: arrayctor.cont: +// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3: omp.inner.for.cond.cleanup: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK3: arraydestroy.body: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done3: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96.omp_outlined.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK3: arrayctor.loop: +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK3: arrayctor.cont: +// CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3: omp.inner.for.cond.cleanup: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK3: arraydestroy.body: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done6: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 +// CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56() #[[ATTR2]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK3: arraydestroy.body: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done2: +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK3-NEXT: ret i32 [[TMP16]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiEC2Ei(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 +// CHECK3-SAME: () #[[ATTR4]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK3: arrayctor.loop: +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK3: arrayctor.cont: +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3: omp.inner.for.cond.cleanup: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK3: arraydestroy.body: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done5: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56.omp_outlined.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] +// CHECK3: arrayctor.loop: +// CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] +// CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] +// CHECK3: arrayctor.cont: +// CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3: omp.inner.for.cond.cleanup: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK3: arraydestroy.body: +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done7: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: call void @_ZN1SIiED2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[F]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_teams_generic_loop_private_codegen.cpp +// CHECK3-SAME: () #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @__cxx_global_var_init() +// CHECK3-NEXT: call void @__cxx_global_var_init.1() +// CHECK3-NEXT: call void @__cxx_global_var_init.2() +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK3-SAME: () #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK3-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@__cxx_global_var_init +// CHECK9-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) @test) +// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN1SIfED1Ev, ptr @test, ptr @__dso_handle) #[[ATTR2:[0-9]+]] +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev +// CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev +// CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfED2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR2]] +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev +// CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK9-NEXT: store float [[CONV]], ptr [[F]], align 4 +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfED2Ev +// CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 +// CHECK9-SAME: () #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) @s_arr, float noundef 1.000000e+00) +// CHECK9-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S:%.*]], ptr @s_arr, i64 1), float noundef 2.000000e+00) +// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr null, ptr @__dso_handle) #[[ATTR2]] +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ef +// CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], float noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: call void @_ZN1SIfEC2Ef(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], float noundef [[TMP0]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@__cxx_global_array_dtor +// CHECK9-SAME: (ptr noundef [[TMP0:%.*]]) #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK9: arraydestroy.body: +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S:%.*]], ptr @s_arr, i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @s_arr +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done1: +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ef +// CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], float noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca float, align 4 +// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP0:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to float +// CHECK9-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]] +// CHECK9-NEXT: store float [[ADD]], ptr [[F]], align 4 +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 +// CHECK9-SAME: () #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) @var, float noundef 3.000000e+00) +// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN1SIfED1Ev, ptr @var, ptr @__dso_handle) #[[ATTR2]] +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@main +// CHECK9-SAME: () #[[ATTR3:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1 +// CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: call void @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// CHECK9-NEXT: ret i32 0 +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75 +// CHECK9-SAME: (i64 noundef [[G1:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9: cond.true: +// CHECK9-NEXT: br label [[COND_END:%.*]] +// CHECK9: cond.false: +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: br label [[COND_END]] +// CHECK9: cond.end: +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK9: omp.inner.for.cond: +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined.omp_outlined, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK9: omp.inner.for.inc: +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK9: omp.inner.for.end: +// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK9: omp.loop.exit: +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l75.omp_outlined.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9: cond.true: +// CHECK9-NEXT: br label [[COND_END:%.*]] +// CHECK9: cond.false: +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: br label [[COND_END]] +// CHECK9: cond.end: +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK9: omp.inner.for.cond: +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK9: omp.body.continue: +// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK9: omp.inner.for.inc: +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK9: omp.inner.for.end: +// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK9: omp.loop.exit: +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_teams_generic_loop_private_codegen.cpp +// CHECK9-SAME: () #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: call void @__cxx_global_var_init() +// CHECK9-NEXT: call void @__cxx_global_var_init.1() +// CHECK9-NEXT: call void @__cxx_global_var_init.2() +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK9-SAME: () #[[ATTR0]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK9-NEXT: ret void +// diff --git a/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp @@ -0,0 +1,1524 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -DCHECK -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3 +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK3 + +// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCHECK -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK9 +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK9 + +// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DLAMBDA -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +template +T tmain() { + T t_var = T(); + T vec[] = {1, 2}; +#pragma omp target +#pragma omp teams loop reduction(+: t_var) + for (int i = 0; i < 2; ++i) { + t_var += (T) i; + } + return T(); +} + +int main() { + static int sivar; +#ifdef LAMBDA + + [&]() { +#pragma omp target +#pragma omp teams loop reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + + // Skip global and bound tid vars + + + + // Skip global and bound tid vars, and prev lb and ub vars + // skip loop vars + + + sivar += i; + + [&]() { + + sivar += 4; + + }(); + } + }(); + return 0; +#else +#pragma omp target +#pragma omp teams loop reduction(+: sivar) + for (int i = 0; i < 2; ++i) { + sivar += i; + } + return tmain(); +#endif +} + + + + +// Skip global and bound tid vars + + +// Skip global and bound tid vars, and prev lb and ub +// skip loop vars + + + + +// Skip global and bound tid vars + + +// Skip global and bound tid vars, and prev lb and ub vars +// skip loop vars + +#endif +// CHECK1-LABEL: define {{[^@]+}}@main +// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68(i64 [[TMP1]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v() +// CHECK1-NEXT: ret i32 [[CALL]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 +// CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined, ptr [[SIVAR_ADDR]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK1-NEXT: ] +// CHECK1: .omp.reduction.case1: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.case2: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.default: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK1-NEXT: ] +// CHECK1: .omp.reduction.case1: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.case2: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.default: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func +// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func +// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// CHECK1-SAME: () #[[ATTR5:[0-9]+]] comdat { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i64 [[TMP1]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: ret i32 0 +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32 +// CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined, ptr [[T_VAR_ADDR]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK1-NEXT: ] +// CHECK1: .omp.reduction.case1: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.case2: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.default: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK1-NEXT: ] +// CHECK1: .omp.reduction.case1: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.case2: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK1: .omp.reduction.default: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func +// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func +// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK1-SAME: () #[[ATTR7:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK1-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@main +// CHECK3-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68(i32 [[TMP1]]) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK3-NEXT: ret i32 [[CALL]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 +// CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined, ptr [[SIVAR_ADDR]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK3-NEXT: ] +// CHECK3: .omp.reduction.case1: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.case2: +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.default: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK3-NEXT: ] +// CHECK3: .omp.reduction.case1: +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.case2: +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.default: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp_outlined.omp.reduction.reduction_func +// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.omp_outlined.omp.reduction.reduction_func +// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// CHECK3-SAME: () #[[ATTR5:[0-9]+]] comdat { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.1, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32 [[TMP1]]) #[[ATTR2]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: ret i32 0 +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32 +// CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined, ptr [[T_VAR_ADDR]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK3-NEXT: ] +// CHECK3: .omp.reduction.case1: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.case2: +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.default: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK3-NEXT: ] +// CHECK3: .omp.reduction.case1: +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.case2: +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK3: .omp.reduction.default: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp_outlined.omp.reduction.reduction_func +// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.omp_outlined.omp.reduction.reduction_func +// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR3]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK3-SAME: () #[[ATTR7:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK3-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@main +// CHECK9-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1 +// CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: call void @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// CHECK9-NEXT: ret i32 0 +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45 +// CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined, ptr [[SIVAR_ADDR]]) +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9: cond.true: +// CHECK9-NEXT: br label [[COND_END:%.*]] +// CHECK9: cond.false: +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: br label [[COND_END]] +// CHECK9: cond.end: +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK9: omp.inner.for.cond: +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) +// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK9: omp.inner.for.inc: +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK9: omp.inner.for.end: +// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK9: omp.loop.exit: +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK9-NEXT: ] +// CHECK9: .omp.reduction.case1: +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK9: .omp.reduction.case2: +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK9: .omp.reduction.default: +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR2]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9: cond.true: +// CHECK9-NEXT: br label [[COND_END:%.*]] +// CHECK9: cond.false: +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: br label [[COND_END]] +// CHECK9: cond.end: +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK9: omp.inner.for.cond: +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) +// CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK9: omp.body.continue: +// CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK9: omp.inner.for.inc: +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK9: omp.inner.for.end: +// CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK9: omp.loop.exit: +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] +// CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] +// CHECK9-NEXT: ] +// CHECK9: .omp.reduction.case1: +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK9: .omp.reduction.case2: +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4 +// CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] +// CHECK9: .omp.reduction.default: +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp_outlined.omp.reduction.reduction_func +// CHECK9-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l45.omp_outlined.omp.reduction.reduction_func +// CHECK9-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR4]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK9-NEXT: ret void +// +// +// CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK9-SAME: () #[[ATTR6:[0-9]+]] { +// CHECK9-NEXT: entry: +// CHECK9-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK9-NEXT: ret void +//