Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.h =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -153,12 +153,6 @@ /// Constant for NVPTX for better optimization. bool isDefaultLocationConstant() const override { return true; } - /// Returns additional flags that can be stored in reserved_2 field of the - /// default location. - /// For NVPTX target contains data about SPMD/Non-SPMD execution mode + - /// Full/Lightweight runtime mode. Used for better optimization. - unsigned getDefaultLocationReserved2Flags() const override; - public: explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM); void clear() override; @@ -380,14 +374,9 @@ /// to emit optimized code. ExecutionMode CurrentExecutionMode = EM_Unknown; - /// true if we're emitting the code for the target region and next parallel - /// region is L0 for sure. - bool IsInTargetMasterThreadRegion = false; /// true if currently emitting code for target/teams/distribute region, false /// - otherwise. bool IsInTTDRegion = false; - /// true if we're definitely in the parallel region. - bool IsInParallelRegion = false; /// Map between an outlined function and its wrapper. llvm::DenseMap WrapperFunctionsMap; @@ -412,12 +401,10 @@ using EscapedParamsTy = llvm::SmallPtrSet; struct FunctionData { DeclToAddrMapTy LocalVarData; - llvm::Optional SecondaryLocalVarData = std::nullopt; EscapedParamsTy EscapedParameters; llvm::SmallVector EscapedVariableLengthDecls; llvm::SmallVector, 4> EscapedVariableLengthDeclsAddrs; - llvm::Value *IsInSPMDModeFlag = nullptr; std::unique_ptr MappedParams; }; /// Maps the function to the list of the globalized variables with their @@ -429,9 +416,6 @@ /// reductions. /// All the records are gathered into a union `union.type` is created. llvm::SmallVector TeamsReductions; - /// Shared pointer for the global memory in the global memory buffer used for - /// the given kernel. - llvm::GlobalVariable *KernelStaticGlobalized = nullptr; /// Pair of the Non-SPMD team and all reductions variables in this team /// region. std::pair> Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -94,9 +94,6 @@ /// Global memory alignment for performance. GlobalMemoryAlignment = 128, - - /// Maximal size of the shared memory buffer. - SharedMemorySize = 128, }; static const ValueDecl *getPrivateItem(const Expr *RefExpr) { @@ -771,8 +768,7 @@ void CGOpenMPRuntimeGPU::emitKernelInit(CodeGenFunction &CGF, EntryFunctionState &EST, bool IsSPMD) { CGBuilderTy &Bld = CGF.Builder; - Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD, true)); - IsInTargetMasterThreadRegion = IsSPMD; + Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD)); if (!IsSPMD) emitGenericVarsProlog(CGF, EST.Loc); } @@ -784,7 +780,7 @@ emitGenericVarsEpilog(CGF); CGBuilderTy &Bld = CGF.Builder; - OMPBuilder.createTargetDeinit(Bld, IsSPMD, true); + OMPBuilder.createTargetDeinit(Bld, IsSPMD); } void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, @@ -876,18 +872,6 @@ (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE; } // anonymous namespace -unsigned CGOpenMPRuntimeGPU::getDefaultLocationReserved2Flags() const { - switch (getExecutionMode()) { - case EM_SPMD: - return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE); - case EM_NonSPMD: - return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE); - case EM_Unknown: - return UndefinedMode; - } - llvm_unreachable("Unknown flags are requested."); -} - CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) : CGOpenMPRuntime(CGM) { llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, true, @@ -939,33 +923,13 @@ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { // Emit target region as a standalone region. - class NVPTXPrePostActionTy : public PrePostActionTy { - bool &IsInParallelRegion; - bool PrevIsInParallelRegion; - - public: - NVPTXPrePostActionTy(bool &IsInParallelRegion) - : IsInParallelRegion(IsInParallelRegion) {} - void Enter(CodeGenFunction &CGF) override { - PrevIsInParallelRegion = IsInParallelRegion; - IsInParallelRegion = true; - } - void Exit(CodeGenFunction &CGF) override { - IsInParallelRegion = PrevIsInParallelRegion; - } - } Action(IsInParallelRegion); - CodeGen.setAction(Action); bool PrevIsInTTDRegion = IsInTTDRegion; IsInTTDRegion = false; - bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion; - IsInTargetMasterThreadRegion = false; auto *OutlinedFun = cast(CGOpenMPRuntime::emitParallelOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen)); - IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion; IsInTTDRegion = PrevIsInTTDRegion; - if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD && - !IsInParallelRegion) { + if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD) { llvm::Function *WrapperFun = createParallelDataSharingWrapper(OutlinedFun, D); WrapperFunctionsMap[OutlinedFun] = WrapperFun; @@ -3357,16 +3321,6 @@ assert(VD->isCanonicalDecl() && "Expected canonical declaration"); Data.insert(std::make_pair(VD, MappedVarData())); } - if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) { - CheckVarsEscapingDeclContext VarChecker(CGF, std::nullopt); - VarChecker.Visit(Body); - I->getSecond().SecondaryLocalVarData.emplace(); - DeclToAddrMapTy &Data = *I->getSecond().SecondaryLocalVarData; - for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { - assert(VD->isCanonicalDecl() && "Expected canonical declaration"); - Data.insert(std::make_pair(VD, MappedVarData())); - } - } if (!NeedToDelayGlobalization) { emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true); struct GlobalizationScope final : EHScopeStack::Cleanup { Index: clang/test/OpenMP/amdgcn_target_codegen.cpp =================================================================== --- clang/test/OpenMP/amdgcn_target_codegen.cpp +++ clang/test/OpenMP/amdgcn_target_codegen.cpp @@ -13,7 +13,7 @@ int arr[N]; -// CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1, i1 true, i1 true) +// CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1, i1 true) #pragma omp target for (int i = 0; i < N; i++) { arr[i] = 1; @@ -27,7 +27,7 @@ int arr[N]; -// CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 2, i1 false) #pragma omp target simd for (int i = 0; i < N; i++) { arr[i] = 1; Index: clang/test/OpenMP/declare_target_codegen_globalization.cpp =================================================================== --- clang/test/OpenMP/declare_target_codegen_globalization.cpp +++ clang/test/OpenMP/declare_target_codegen_globalization.cpp @@ -31,15 +31,15 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -56,8 +56,8 @@ // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR6:[0-9]+]] -// CHECK1-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z3barv() #[[ATTR6]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z3barv() #[[ATTR7]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 // CHECK1-NEXT: ret void @@ -77,7 +77,7 @@ // CHECK1-SAME: () #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR6]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR7]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 4) // CHECK1-NEXT: ret i32 [[CALL]] // Index: clang/test/OpenMP/nvptx_SPMD_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_SPMD_codegen.cpp +++ clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -11,31 +11,31 @@ int a; // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 -// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds -// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds -// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 void foo() { -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] #pragma omp target teams distribute parallel for simd if(a) @@ -60,25 +60,25 @@ for (int i = 0; i < 10; ++i) ; int a; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] #pragma omp target teams distribute parallel for lastprivate(a) @@ -102,25 +102,25 @@ #pragma omp target teams distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] // CHECK: call i32 @__kmpc_target_init( // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] #pragma omp target teams @@ -162,25 +162,25 @@ #pragma omp distribute parallel for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] #pragma omp target teams @@ -211,25 +211,25 @@ #pragma omp distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[DISTR_FULL]] // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] #pragma omp target #pragma omp teams @@ -266,19 +266,19 @@ #pragma omp distribute parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] #pragma omp target parallel for if(a) for (int i = 0; i < 10; ++i) @@ -301,25 +301,25 @@ #pragma omp target parallel for schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] #pragma omp target parallel if(a) @@ -350,25 +350,25 @@ #pragma omp for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] // CHECK-DAG: [[BAR_FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] #pragma omp target #pragma omp parallel @@ -405,17 +405,17 @@ #pragma omp for simd schedule(guided) for (int i = 0; i < 10; ++i) ; -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-DAG: [[FULL]] #pragma omp target #pragma omp parallel for Index: clang/test/OpenMP/nvptx_data_sharing.cpp =================================================================== --- clang/test/OpenMP/nvptx_data_sharing.cpp +++ clang/test/OpenMP/nvptx_data_sharing.cpp @@ -36,7 +36,7 @@ // CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [2 x ptr], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -49,14 +49,14 @@ // CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) // CHECK-NEXT: store i32 100, ptr [[B]], align 4 // CHECK-NEXT: store i32 1000, ptr [[C]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[B]], ptr [[TMP5]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 1 -// CHECK-NEXT: store ptr [[A]], ptr [[TMP7]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 0 +// CHECK-NEXT: store ptr [[B]], ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 1 +// CHECK-NEXT: store ptr [[A]], ptr [[TMP4]], align 8 // CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 2) // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[B]], i64 4) // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 4) -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void @@ -89,8 +89,8 @@ // CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]] // CHECK-NEXT: ret void // // @@ -129,9 +129,9 @@ // CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR4]] // CHECK-NEXT: ret void // Index: clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -44,18 +44,18 @@ // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: -// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) +// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4 // CHECK4-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]] -// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -114,97 +114,97 @@ // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP0]], i64 40, i1 false) // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK4-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP18]] to ptr +// CHECK4-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 // CHECK4-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP20]] to ptr // CHECK4-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 -// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP22]] to ptr -// CHECK4-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 -// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK4-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP27]], align 8 -// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8 -// CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK4-NEXT: store ptr [[B4]], ptr [[TMP31]], align 8 -// CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 -// CHECK4-NEXT: store ptr [[C1]], ptr [[TMP33]], align 8 -// CHECK4-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[TMP35]], align 8 -// CHECK4-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK4-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK4-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -// CHECK4-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 7) +// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK4-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP25]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK4-NEXT: store ptr [[B4]], ptr [[TMP27]], align 8 +// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 +// CHECK4-NEXT: store ptr [[C1]], ptr [[TMP28]], align 8 +// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 +// CHECK4-NEXT: store ptr [[TMP3]], ptr [[TMP29]], align 8 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK4-NEXT: [[TMP31:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK4-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP33]], i32 [[TMP31]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 7) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK4-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK4-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK4-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK4-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK4-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] +// CHECK4-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK4-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK4: cond.true12: -// CHECK4-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK4-NEXT: br label [[COND_END14:%.*]] // CHECK4: cond.false13: -// CHECK4-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: br label [[COND_END14]] // CHECK4: cond.end14: -// CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] +// CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE12]] ], [ [[TMP43]], [[COND_FALSE13]] ] // CHECK4-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP52]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP53:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4 -// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP54]]) -// CHECK4-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK4-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP46]]) +// CHECK4-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 +// CHECK4-NEXT: br i1 [[TMP48]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: // CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[C1]], i64 40, i1 false) // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] @@ -277,38 +277,38 @@ // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP2]], i64 40, i1 false) -// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[CONV7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK4-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP17]] +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[CONV7:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK4-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK4-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP15]] // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I6]], align 4 // CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I6]]) #[[ATTR8:[0-9]+]] // CHECK4-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP1]]) #[[ATTR8]] // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]] -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B4]], i64 0, i64 [[IDXPROM]] // CHECK4-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]] // CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]] -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK4-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK4-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP18]] to i64 // CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[C5]], i64 0, i64 [[IDXPROM13]] // CHECK4-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX14]]) #[[ATTR8]] // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]] -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK4-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK4-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP19]] to i64 // CHECK4-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM17]] // CHECK4-NEXT: [[CALL19:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX18]]) #[[ATTR8]] // CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]] @@ -317,20 +317,20 @@ // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK4-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK4-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP23]]) +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK4-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: // CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[C5]], i64 40, i1 false) // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] @@ -360,18 +360,18 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK5: user_code.entry: -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) +// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK5-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]] -// CHECK5-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK5-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK5-NEXT: ret void // CHECK5: worker.exit: // CHECK5-NEXT: ret void @@ -430,95 +430,95 @@ // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B4]], ptr align 4 [[TMP0]], i32 40, i1 false) // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP19]] to ptr +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP17]] to ptr +// CHECK5-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP18]] to ptr // CHECK5-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP20]] to ptr -// CHECK5-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP25]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK5-NEXT: store ptr [[B4]], ptr [[TMP29]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 -// CHECK5-NEXT: store ptr [[C1]], ptr [[TMP31]], align 4 -// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 -// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP33]], align 4 -// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK5-NEXT: [[TMP36:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK5-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 -// CHECK5-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP38]], i32 [[TMP36]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 7) +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP23]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[B4]], ptr [[TMP25]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 +// CHECK5-NEXT: store ptr [[C1]], ptr [[TMP26]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP27]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK5-NEXT: [[TMP29:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK5-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 [[TMP29]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 7) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] +// CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK5-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK5: cond.true12: -// CHECK5-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK5-NEXT: br label [[COND_END14:%.*]] // CHECK5: cond.false13: -// CHECK5-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END14]] // CHECK5: cond.end14: -// CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE12]] ], [ [[TMP49]], [[COND_FALSE13]] ] +// CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE12]] ], [ [[TMP41]], [[COND_FALSE13]] ] // CHECK5-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP50]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP52]]) -// CHECK5-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 -// CHECK5-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]]) +// CHECK5-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK5-NEXT: br i1 [[TMP46]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[C1]], i32 40, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] @@ -589,35 +589,35 @@ // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B3]], ptr align 4 [[TMP2]], i32 40, i1 false) -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 // CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I5]]) #[[ATTR8:[0-9]+]] // CHECK5-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP1]]) #[[ATTR8]] // CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B3]], i32 0, i32 [[TMP19]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B3]], i32 0, i32 [[TMP17]] // CHECK5-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]] // CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[C4]], i32 0, i32 [[TMP20]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[C4]], i32 0, i32 [[TMP18]] // CHECK5-NEXT: [[CALL12:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX11]]) #[[ATTR8]] // CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i32 0, i32 [[TMP19]] // CHECK5-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX14]]) #[[ATTR8]] // CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] // CHECK5-NEXT: store i32 [[ADD16]], ptr [[TMP1]], align 4 @@ -625,20 +625,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK5-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP23]]) +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[C4]], i32 40, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] Index: clang/test/OpenMP/nvptx_lambda_capturing.cpp =================================================================== --- clang/test/OpenMP/nvptx_lambda_capturing.cpp +++ clang/test/OpenMP/nvptx_lambda_capturing.cpp @@ -122,220 +122,220 @@ // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP55]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 +// CHECK1-NEXT: store ptr null, ptr [[TMP57]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9 +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 9 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 9 // CHECK1-NEXT: store ptr null, ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP68]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP70]], align 8 -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP71]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP73]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 -// CHECK1-NEXT: store ptr null, ptr [[TMP75]], align 8 -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9 -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP76]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 9 -// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 9 -// CHECK1-NEXT: store ptr null, ptr [[TMP80]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 10 -// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP81]], align 8 -// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 10 -// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP83]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 10 -// CHECK1-NEXT: store ptr null, ptr [[TMP85]], align 8 -// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 10 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 10 +// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 10 +// CHECK1-NEXT: store ptr null, ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP88]], align 4 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 11, ptr [[TMP89]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP86]], ptr [[TMP90]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP87]], ptr [[TMP91]], align 8 -// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP92]], align 8 -// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP93]], align 8 -// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP94]], align 8 -// CHECK1-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP95]], align 8 -// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP96]], align 8 -// CHECK1-NEXT: [[TMP97:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0 -// CHECK1-NEXT: br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP66]], align 4 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 11, ptr [[TMP67]], align 4 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP64]], ptr [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP65]], ptr [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP70]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP71]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP72]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP76:%.*]] = icmp ne i32 [[TMP75]], 0 +// CHECK1-NEXT: br i1 [[TMP76]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41(i64 [[TMP11]], ptr [[TMP12]], ptr [[TMP13]], ptr [[TMP14]], ptr [[A]], ptr [[TMP15]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: -// CHECK1-NEXT: [[TMP99:%.*]] = load ptr, ptr [[B]], align 8 -// CHECK1-NEXT: store ptr [[TMP99]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP100:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK1-NEXT: store ptr [[TMP100]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP101:%.*]] = load ptr, ptr [[L]], align 8 -// CHECK1-NEXT: store ptr [[TMP101]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP102:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP103:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP104:%.*]] = load ptr, ptr [[D]], align 8 -// CHECK1-NEXT: [[TMP105:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP108:%.*]] = load ptr, ptr [[TMP107]], align 8 -// CHECK1-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 8 -// CHECK1-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP114:%.*]] = load ptr, ptr [[TMP113]], align 8 -// CHECK1-NEXT: [[TMP115:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP116:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP117:%.*]] = load ptr, ptr [[TMP116]], align 8 -// CHECK1-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 4 -// CHECK1-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 4 -// CHECK1-NEXT: [[TMP120:%.*]] = load ptr, ptr [[TMP119]], align 8 -// CHECK1-NEXT: [[TMP121:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP121]], align 8 -// CHECK1-NEXT: [[TMP123:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP123]], align 8 -// CHECK1-NEXT: [[TMP125:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[B]], align 8 +// CHECK1-NEXT: store ptr [[TMP77]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK1-NEXT: store ptr [[TMP78]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[L]], align 8 +// CHECK1-NEXT: store ptr [[TMP79]], ptr [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load ptr, ptr [[D]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP89:%.*]] = load ptr, ptr [[TMP88]], align 8 +// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP92:%.*]] = load ptr, ptr [[TMP91]], align 8 +// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP95:%.*]] = load ptr, ptr [[TMP94]], align 8 +// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP98:%.*]] = load ptr, ptr [[TMP97]], align 8 +// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP99]], align 8 +// CHECK1-NEXT: [[TMP100:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP100]], align 8 +// CHECK1-NEXT: [[TMP101:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP101]], align 8 +// CHECK1-NEXT: [[TMP102:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP80]], ptr [[TMP102]], align 8 +// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP80]], ptr [[TMP103]], align 8 +// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP104]], align 8 +// CHECK1-NEXT: [[TMP105:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP81]], ptr [[TMP105]], align 8 +// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP81]], ptr [[TMP106]], align 8 +// CHECK1-NEXT: [[TMP107:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP107]], align 8 +// CHECK1-NEXT: [[TMP108:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP82]], ptr [[TMP108]], align 8 +// CHECK1-NEXT: [[TMP109:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP82]], ptr [[TMP109]], align 8 +// CHECK1-NEXT: [[TMP110:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP110]], align 8 +// CHECK1-NEXT: [[TMP111:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP111]], align 8 +// CHECK1-NEXT: [[TMP112:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP112]], align 8 +// CHECK1-NEXT: [[TMP113:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP113]], align 8 +// CHECK1-NEXT: [[TMP114:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP83]], ptr [[TMP114]], align 8 +// CHECK1-NEXT: [[TMP115:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP83]], ptr [[TMP115]], align 8 +// CHECK1-NEXT: [[TMP116:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP116]], align 8 +// CHECK1-NEXT: [[TMP117:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[TMP84]], ptr [[TMP117]], align 8 +// CHECK1-NEXT: [[TMP118:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[TMP86]], ptr [[TMP118]], align 8 +// CHECK1-NEXT: [[TMP119:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP119]], align 8 +// CHECK1-NEXT: [[TMP120:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[TMP87]], ptr [[TMP120]], align 8 +// CHECK1-NEXT: [[TMP121:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[TMP89]], ptr [[TMP121]], align 8 +// CHECK1-NEXT: [[TMP122:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP122]], align 8 +// CHECK1-NEXT: [[TMP123:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP90]], ptr [[TMP123]], align 8 +// CHECK1-NEXT: [[TMP124:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP92]], ptr [[TMP124]], align 8 +// CHECK1-NEXT: [[TMP125:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 8 // CHECK1-NEXT: store ptr null, ptr [[TMP125]], align 8 -// CHECK1-NEXT: [[TMP126:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP102]], ptr [[TMP126]], align 8 -// CHECK1-NEXT: [[TMP128:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP102]], ptr [[TMP128]], align 8 -// CHECK1-NEXT: [[TMP130:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP130]], align 8 -// CHECK1-NEXT: [[TMP131:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP103]], ptr [[TMP131]], align 8 -// CHECK1-NEXT: [[TMP133:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP103]], ptr [[TMP133]], align 8 -// CHECK1-NEXT: [[TMP135:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP135]], align 8 -// CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP104]], ptr [[TMP136]], align 8 -// CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP104]], ptr [[TMP138]], align 8 -// CHECK1-NEXT: [[TMP140:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP140]], align 8 -// CHECK1-NEXT: [[TMP141:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP141]], align 8 -// CHECK1-NEXT: [[TMP143:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP143]], align 8 -// CHECK1-NEXT: [[TMP145:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP145]], align 8 -// CHECK1-NEXT: [[TMP146:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr [[TMP105]], ptr [[TMP146]], align 8 -// CHECK1-NEXT: [[TMP148:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 5 -// CHECK1-NEXT: store ptr [[TMP105]], ptr [[TMP148]], align 8 -// CHECK1-NEXT: [[TMP150:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 5 -// CHECK1-NEXT: store ptr null, ptr [[TMP150]], align 8 -// CHECK1-NEXT: [[TMP151:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 6 -// CHECK1-NEXT: store ptr [[TMP106]], ptr [[TMP151]], align 8 -// CHECK1-NEXT: [[TMP153:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 6 -// CHECK1-NEXT: store ptr [[TMP108]], ptr [[TMP153]], align 8 -// CHECK1-NEXT: [[TMP155:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP155]], align 8 -// CHECK1-NEXT: [[TMP156:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 7 -// CHECK1-NEXT: store ptr [[TMP109]], ptr [[TMP156]], align 8 -// CHECK1-NEXT: [[TMP158:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 7 -// CHECK1-NEXT: store ptr [[TMP111]], ptr [[TMP158]], align 8 -// CHECK1-NEXT: [[TMP160:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP160]], align 8 -// CHECK1-NEXT: [[TMP161:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 8 -// CHECK1-NEXT: store ptr [[TMP112]], ptr [[TMP161]], align 8 -// CHECK1-NEXT: [[TMP163:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 8 -// CHECK1-NEXT: store ptr [[TMP114]], ptr [[TMP163]], align 8 -// CHECK1-NEXT: [[TMP165:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 8 -// CHECK1-NEXT: store ptr null, ptr [[TMP165]], align 8 -// CHECK1-NEXT: [[TMP166:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 9 -// CHECK1-NEXT: store ptr [[TMP115]], ptr [[TMP166]], align 8 -// CHECK1-NEXT: [[TMP168:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 9 -// CHECK1-NEXT: store ptr [[TMP117]], ptr [[TMP168]], align 8 -// CHECK1-NEXT: [[TMP170:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 9 -// CHECK1-NEXT: store ptr null, ptr [[TMP170]], align 8 -// CHECK1-NEXT: [[TMP171:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 10 -// CHECK1-NEXT: store ptr [[TMP118]], ptr [[TMP171]], align 8 -// CHECK1-NEXT: [[TMP173:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 10 -// CHECK1-NEXT: store ptr [[TMP120]], ptr [[TMP173]], align 8 -// CHECK1-NEXT: [[TMP175:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 10 -// CHECK1-NEXT: store ptr null, ptr [[TMP175]], align 8 -// CHECK1-NEXT: [[TMP176:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP177:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP126:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 9 +// CHECK1-NEXT: store ptr [[TMP93]], ptr [[TMP126]], align 8 +// CHECK1-NEXT: [[TMP127:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 9 +// CHECK1-NEXT: store ptr [[TMP95]], ptr [[TMP127]], align 8 +// CHECK1-NEXT: [[TMP128:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 9 +// CHECK1-NEXT: store ptr null, ptr [[TMP128]], align 8 +// CHECK1-NEXT: [[TMP129:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 10 +// CHECK1-NEXT: store ptr [[TMP96]], ptr [[TMP129]], align 8 +// CHECK1-NEXT: [[TMP130:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 10 +// CHECK1-NEXT: store ptr [[TMP98]], ptr [[TMP130]], align 8 +// CHECK1-NEXT: [[TMP131:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 10 +// CHECK1-NEXT: store ptr null, ptr [[TMP131]], align 8 +// CHECK1-NEXT: [[TMP132:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP133:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP178:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP178]], align 4 -// CHECK1-NEXT: [[TMP179:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 -// CHECK1-NEXT: store i32 11, ptr [[TMP179]], align 4 -// CHECK1-NEXT: [[TMP180:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP176]], ptr [[TMP180]], align 8 -// CHECK1-NEXT: [[TMP181:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP177]], ptr [[TMP181]], align 8 -// CHECK1-NEXT: [[TMP182:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP182]], align 8 -// CHECK1-NEXT: [[TMP183:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP183]], align 8 -// CHECK1-NEXT: [[TMP184:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP184]], align 8 -// CHECK1-NEXT: [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP185]], align 8 -// CHECK1-NEXT: [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP186]], align 8 -// CHECK1-NEXT: [[TMP187:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43.region_id, ptr [[KERNEL_ARGS10]]) -// CHECK1-NEXT: [[TMP188:%.*]] = icmp ne i32 [[TMP187]], 0 -// CHECK1-NEXT: br i1 [[TMP188]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CHECK1-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP134]], align 4 +// CHECK1-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 +// CHECK1-NEXT: store i32 11, ptr [[TMP135]], align 4 +// CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP132]], ptr [[TMP136]], align 8 +// CHECK1-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP133]], ptr [[TMP137]], align 8 +// CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP138]], align 8 +// CHECK1-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP139]], align 8 +// CHECK1-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP140]], align 8 +// CHECK1-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP141]], align 8 +// CHECK1-NEXT: [[TMP142:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP142]], align 8 +// CHECK1-NEXT: [[TMP143:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43.region_id, ptr [[KERNEL_ARGS10]]) +// CHECK1-NEXT: [[TMP144:%.*]] = icmp ne i32 [[TMP143]], 0 +// CHECK1-NEXT: br i1 [[TMP144]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] // CHECK1: omp_offload.failed11: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43(ptr [[ARGC_ADDR]], ptr [[TMP102]], ptr [[TMP103]], ptr [[TMP104]], ptr [[A]], ptr [[TMP105]]) #[[ATTR4]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43(ptr [[ARGC_ADDR]], ptr [[TMP80]], ptr [[TMP81]], ptr [[TMP82]], ptr [[A]], ptr [[TMP83]]) #[[ATTR4]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT12]] // CHECK1: omp_offload.cont12: -// CHECK1-NEXT: [[TMP189:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: [[TMP145:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZN1S3fooEv(ptr noundef nonnull align 4 dereferenceable(4) @s) -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP189]], [[CALL]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP145]], [[CALL]] // CHECK1-NEXT: ret i32 [[ADD]] // // @@ -373,16 +373,16 @@ // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP4]], i64 40, i1 false) // CHECK1-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[B5]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[B5]], align 4 // CHECK1-NEXT: store ptr [[B5]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[C7]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[C7]], align 4 // CHECK1-NEXT: store ptr [[C7]], ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP11]]) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP9]]) // CHECK1-NEXT: ret void // // @@ -461,20 +461,20 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false) // CHECK1-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARGC5]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[B6]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARGC5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[B6]], align 4 // CHECK1-NEXT: store ptr [[B6]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[C8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[C8]], align 4 // CHECK1-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[A10]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP14]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A10]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -504,106 +504,106 @@ // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP22]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP23]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK1-NEXT: br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27(ptr [[THIS1]], ptr [[TMP2]]) #[[ATTR4]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[L]], align 8 -// CHECK1-NEXT: store ptr [[TMP33]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP34]], ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP34]], ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP35]], ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP36]], ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[L]], align 8 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP28]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP28]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP29]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP54]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP55]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP52]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP53]], ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.6, ptr [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.7, ptr [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29.region_id, ptr [[KERNEL_ARGS6]]) -// CHECK1-NEXT: [[TMP64:%.*]] = icmp ne i32 [[TMP63]], 0 -// CHECK1-NEXT: br i1 [[TMP64]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP42]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, ptr [[TMP43]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP40]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP41]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.6, ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.7, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29.region_id, ptr [[KERNEL_ARGS6]]) +// CHECK1-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK1-NEXT: br i1 [[TMP52]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] // CHECK1: omp_offload.failed7: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29(ptr [[THIS1]], ptr [[TMP34]]) #[[ATTR4]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29(ptr [[THIS1]], ptr [[TMP28]]) #[[ATTR4]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]] // CHECK1: omp_offload.cont8: // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[A]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[L]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooIZN1S3fooEvEUlvE_EiRKT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP66]]) -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP65]], [[CALL]] +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[L]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooIZN1S3fooEvEUlvE_EiRKT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP54]]) +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP53]], [[CALL]] // CHECK1-NEXT: ret i32 [[ADD]] // // @@ -623,8 +623,8 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false) // CHECK1-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP3]]) // CHECK1-NEXT: ret void // // @@ -677,8 +677,8 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false) // CHECK1-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP3]]) // CHECK1-NEXT: ret void // // @@ -698,40 +698,40 @@ // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.9, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, ptr [[TMP13]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.9, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18(ptr [[TMP1]]) #[[ATTR4]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -769,8 +769,8 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T1]], ptr align 8 [[TMP1]], i64 8, i1 false) // CHECK1-NEXT: store ptr [[T1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP4]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -794,19 +794,19 @@ // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP3]], i64 8, i1 false) // CHECK2-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP8]]) #[[ATTR7:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) #[[ATTR7:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -837,18 +837,18 @@ // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP7]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -874,11 +874,11 @@ // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false) // CHECK2-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 // CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP7]]) #[[ATTR7]] +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]] // CHECK2-NEXT: ret void // // @@ -913,37 +913,37 @@ // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 // CHECK2-NEXT: store ptr [[TMP3]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 // CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false) // CHECK2-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[B5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[B5]], align 4 // CHECK2-NEXT: store ptr [[B5]], ptr [[_TMP6]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[C7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[C7]], align 4 // CHECK2-NEXT: store ptr [[C7]], ptr [[_TMP8]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK2-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP8]], align 8 // CHECK2-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP18]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 4 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP19]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP20]]) #[[ATTR7]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP16]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 4 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP18]]) #[[ATTR7]] +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -976,29 +976,29 @@ // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 // CHECK2-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP5]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 // CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP17]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 -// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP21]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP14]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 +// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP16]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1045,33 +1045,33 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 // CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false) // CHECK2-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[ARGC5]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK2-NEXT: store i32 [[TMP10]], ptr [[B6]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[ARGC5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[B6]], align 4 // CHECK2-NEXT: store ptr [[B6]], ptr [[_TMP7]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[C8]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[C8]], align 4 // CHECK2-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[A10]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[ARGC5]], ptr [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[A10]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[ARGC5]], ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 8 +// CHECK2-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP9]], align 8 // CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP20]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 4 -// CHECK2-NEXT: store ptr [[A10]], ptr [[TMP21]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP22]]) #[[ATTR7]] +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP18]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 4 +// CHECK2-NEXT: store ptr [[A10]], ptr [[TMP19]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP20]]) #[[ATTR7]] // CHECK2-NEXT: ret void // // @@ -1084,16 +1084,16 @@ // CHECK2-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1116,9 +1116,9 @@ // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T1]], ptr align 8 [[TMP1]], i64 8, i1 false) // CHECK2-NEXT: store ptr [[T1]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP3]]) #[[ATTR7]] // CHECK2-NEXT: ret void // // @@ -1153,37 +1153,37 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 // CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false) // CHECK3-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[B5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[B5]], align 4 // CHECK3-NEXT: store ptr [[B5]], ptr [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[C7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[C7]], align 4 // CHECK3-NEXT: store ptr [[C7]], ptr [[_TMP8]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP8]], align 8 // CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP19]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP20]]) #[[ATTR7:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP18]]) #[[ATTR7:[0-9]+]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1216,29 +1216,29 @@ // CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 // CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP5]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP21]], align 8 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6) -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP16]], align 8 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6) +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1285,33 +1285,33 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false) // CHECK3-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARGC5]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[B6]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[ARGC5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[B6]], align 4 // CHECK3-NEXT: store ptr [[B6]], ptr [[_TMP7]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[C8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[C8]], align 4 // CHECK3-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[A10]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[ARGC5]], ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[A10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARGC5]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 8 +// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP9]], align 8 // CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK3-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP20]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 4 -// CHECK3-NEXT: store ptr [[A10]], ptr [[TMP21]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP22]]) #[[ATTR7]] +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[A10]], ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP20]]) #[[ATTR7]] // CHECK3-NEXT: ret void // // @@ -1328,19 +1328,19 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP3]], i64 8, i1 false) // CHECK3-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP8]]) #[[ATTR7]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) #[[ATTR7]] +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1371,18 +1371,18 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1408,11 +1408,11 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false) // CHECK3-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 // CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP7]]) #[[ATTR7]] +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]] // CHECK3-NEXT: ret void // // @@ -1425,16 +1425,16 @@ // CHECK3-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -1457,8 +1457,8 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T1]], ptr align 8 [[TMP1]], i64 8, i1 false) // CHECK3-NEXT: store ptr [[T1]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]] +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP3]]) #[[ATTR7]] // CHECK3-NEXT: ret void // Index: clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp +++ clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp @@ -29,13 +29,13 @@ // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -56,20 +56,20 @@ // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 // CHECK1-SAME: () #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: call void @_Z3usev() #[[ATTR8]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -105,13 +105,13 @@ // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -132,20 +132,20 @@ // CHECK2-SAME: () #[[ATTR2:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 // CHECK2-SAME: () #[[ATTR5:[0-9]+]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: call void @_Z3usev() #[[ATTR8]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void Index: clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp +++ clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp @@ -40,7 +40,7 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -49,7 +49,7 @@ // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -60,11 +60,11 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) // CHECK1-NEXT: ret void // // @@ -95,8 +95,8 @@ // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4:[0-9]+]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: ret void // // @@ -126,8 +126,8 @@ // CHECK1-NEXT: call void @__atomic_load(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR7]] // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[ATOMIC_TEMP1]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR7]] // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] @@ -148,8 +148,8 @@ // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -160,7 +160,7 @@ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -169,7 +169,7 @@ // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -180,11 +180,11 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) // CHECK2-NEXT: ret void // // @@ -215,8 +215,8 @@ // CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4:[0-9]+]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]] // CHECK2-NEXT: ret void // // @@ -246,8 +246,8 @@ // CHECK2-NEXT: call void @__atomic_load(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR7]] // CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK2: atomic_cont: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[ATOMIC_TEMP1]], align 4 // CHECK2-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR7]] // CHECK2-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] @@ -268,7 +268,7 @@ // CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4]] // CHECK2-NEXT: ret void // Index: clang/test/OpenMP/nvptx_parallel_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -81,7 +81,7 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -89,10 +89,10 @@ // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i64 0) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -192,7 +192,7 @@ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -201,19 +201,19 @@ // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000 // CHECK1-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP7]] to i32 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -255,7 +255,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -266,11 +266,11 @@ // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[A1]], ptr [[TMP3]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr @__omp_outlined__4_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[A1]], align 4 // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i64 4) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -331,8 +331,8 @@ // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR3]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR3]] // CHECK1-NEXT: ret void // // @@ -344,7 +344,7 @@ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 4 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -352,10 +352,10 @@ // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0) // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i32 0) // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i32 0) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -455,7 +455,7 @@ // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -464,19 +464,19 @@ // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000 // CHECK2-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK2-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -518,7 +518,7 @@ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -529,11 +529,11 @@ // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[A1]], ptr [[TMP3]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr @__omp_outlined__4_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[A1]], align 4 // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i32 4) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -594,7 +594,7 @@ // CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR2]] // CHECK2-NEXT: ret void // Index: clang/test/OpenMP/nvptx_parallel_for_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -41,7 +41,7 @@ // CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -51,15 +51,15 @@ // CHECK-NEXT: store i32 [[TMP3]], ptr [[D]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-NEXT: store ptr [[D]], ptr [[TMP6]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-NEXT: store ptr [[D]], ptr [[TMP5]], align 8 // CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2) // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 3 -// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[D]], i64 4) -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void @@ -168,9 +168,9 @@ // CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]]) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR3:[0-9]+]] // CHECK-NEXT: ret void // Index: clang/test/OpenMP/nvptx_target_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_codegen.cpp +++ clang/test/OpenMP/nvptx_target_codegen.cpp @@ -153,17 +153,17 @@ // CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8 // CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -192,11 +192,11 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -207,21 +207,21 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -256,7 +256,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -265,42 +265,42 @@ // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = fpext float [[TMP10]] to double -// CHECK1-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.000000e+00 -// CHECK1-NEXT: [[CONV7:%.*]] = fptrunc double [[ADD6]] to float -// CHECK1-NEXT: store float [[CONV7]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP11]] to double -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK1-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK1-NEXT: store float [[CONV11]], ptr [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX13]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP12]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP10]] to double +// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float +// CHECK1-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 +// CHECK1-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[CONV8:%.*]] = fpext float [[TMP11]] to double +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 +// CHECK1-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float +// CHECK1-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 // CHECK1-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP13]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX15]], i64 3 -// CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX16]], align 8 -// CHECK1-NEXT: [[ADD17:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD17]], ptr [[ARRAYIDX16]], align 8 +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP13]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3 +// CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 +// CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP14]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 // CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 // CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[X]], align 8 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP15]], 1 -// CHECK1-NEXT: store i64 [[ADD18]], ptr [[X]], align 8 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP15]], 1 +// CHECK1-NEXT: store i64 [[ADD17]], ptr [[X]], align 8 // CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 // CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK1-NEXT: [[CONV19:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV19]], 1 -// CHECK1-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK1-NEXT: store i8 [[CONV21]], ptr [[Y]], align 8 +// CHECK1-NEXT: [[CONV18:%.*]] = sext i8 [[TMP16]] to i32 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 +// CHECK1-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 +// CHECK1-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8 // CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(8) ptr @_ZN2TTIxcEixEi(ptr nonnull align 8 dereferenceable(16) [[TMP7]], i32 0) #[[ATTR10:[0-9]+]] // CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8 -// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP17]], 1 -// CHECK1-NEXT: store i64 [[ADD22]], ptr [[CALL]], align 8 -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1 +// CHECK1-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8 +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -330,7 +330,7 @@ // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -338,20 +338,20 @@ // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i32 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 -// CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK1-NEXT: store i8 [[CONV8]], ptr [[AAA_ADDR]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK1-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -374,30 +374,30 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP5]] to double -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK1-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[A4]], align 8 +// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[A3]], align 8 // CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A4]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: store double [[INC]], ptr [[A3]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 // CHECK1-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP7]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2 -// CHECK1-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[A7]], align 8 -// CHECK1-NEXT: [[CONV8:%.*]] = fptosi double [[TMP8]] to i32 -// CHECK1-NEXT: [[A9:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV8]], ptr nonnull align 8 dereferenceable(8) [[A9]]) #[[ATTR10]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK1-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[A6]], align 8 +// CHECK1-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32 +// CHECK1-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -408,25 +408,25 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: store i32 [[F1]], ptr [[F]], align 4 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[F]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[F]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[F]], align 4 // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[F]], i64 4) -// CHECK1-NEXT: ret i32 [[TMP7]] +// CHECK1-NEXT: ret i32 [[TMP4]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -435,7 +435,7 @@ // CHECK1: worker.exit: // CHECK1-NEXT: ret void // CHECK1: 1: -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // // @@ -449,7 +449,7 @@ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -457,15 +457,15 @@ // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -507,10 +507,10 @@ // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: ret void // // @@ -523,17 +523,17 @@ // CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4 // CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -562,11 +562,11 @@ // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 // CHECK2-SAME: () #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -577,21 +577,21 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK2-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2 -// CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK2-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2 +// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK2-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -626,7 +626,7 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -670,7 +670,7 @@ // CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8 // CHECK2-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1 // CHECK2-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8 -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -700,7 +700,7 @@ // CHECK2-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -708,20 +708,20 @@ // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i32 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 -// CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK2-NEXT: store i8 [[CONV7]], ptr [[AAA_ADDR]], align 1 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK2-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK2-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -744,7 +744,7 @@ // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -767,7 +767,7 @@ // CHECK2-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32 // CHECK2-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 // CHECK2-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -778,25 +778,25 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store i32 [[F1]], ptr [[F]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[F]], ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[F]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 2) +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[F]], align 4 // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[F]], i32 4) -// CHECK2-NEXT: ret i32 [[TMP7]] +// CHECK2-NEXT: ret i32 [[TMP4]] // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 // CHECK2-SAME: () #[[ATTR4]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -805,7 +805,7 @@ // CHECK2: worker.exit: // CHECK2-NEXT: ret void // CHECK2: 1: -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // // @@ -819,7 +819,7 @@ // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -827,15 +827,15 @@ // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK2-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -877,9 +877,9 @@ // CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: ret void // Index: clang/test/OpenMP/nvptx_target_parallel_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_parallel_codegen.cpp +++ clang/test/OpenMP/nvptx_target_parallel_codegen.cpp @@ -58,15 +58,15 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -103,19 +103,19 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -159,15 +159,15 @@ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -204,19 +204,19 @@ // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void Index: clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -53,15 +53,15 @@ // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -100,20 +100,20 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -157,15 +157,15 @@ // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -204,20 +204,20 @@ // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void Index: clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -54,8 +54,8 @@ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}( // -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) // // // define internal void [[PFN]]( @@ -233,8 +233,8 @@ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}( // -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) // // // define internal void [[PFN1]]( @@ -490,8 +490,8 @@ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}( // -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) // // // define internal void [[PFN2]]( Index: clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -35,7 +35,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -43,7 +43,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] // CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -527,7 +527,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -535,7 +535,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void Index: clang/test/OpenMP/nvptx_target_printf_codegen.c =================================================================== --- clang/test/OpenMP/nvptx_target_printf_codegen.c +++ clang/test/OpenMP/nvptx_target_printf_codegen.c @@ -48,7 +48,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[FMT:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -61,7 +61,7 @@ // CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2 // CHECK-64-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8 // CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -70,12 +70,12 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0) -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -87,7 +87,7 @@ // CHECK-64-NEXT: [[FOO_ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8 // CHECK-64-NEXT: store i64 [[FOO]], ptr [[FOO_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: @@ -102,7 +102,7 @@ // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void // CHECK-64: if.end: -// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-64-NEXT: ret void // // @@ -114,7 +114,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[FMT:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS:%.*]], align 8 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -127,7 +127,7 @@ // CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2 // CHECK-32-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8 // CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -136,12 +136,12 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0) -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void @@ -153,7 +153,7 @@ // CHECK-32-NEXT: [[FOO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca [[PRINTF_ARGS_0:%.*]], align 8 // CHECK-32-NEXT: store i32 [[FOO]], ptr [[FOO_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: @@ -168,6 +168,6 @@ // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void // CHECK-32: if.end: -// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK-32-NEXT: ret void // Index: clang/test/OpenMP/nvptx_target_simd_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_simd_codegen.cpp +++ clang/test/OpenMP/nvptx_target_simd_codegen.cpp @@ -61,32 +61,32 @@ } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l32}}( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l37}}( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l42}}( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l47}}( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) // CHECK-NOT: call void @__kmpc_for_static_init // CHECK-NOT: call void @__kmpc_for_static_fini // CHECK-NOT: call void @__kmpc_nvptx_end_reduce_nowait( -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) // CHECK: ret void #endif Index: clang/test/OpenMP/nvptx_target_teams_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -57,7 +57,7 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -68,7 +68,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -95,7 +95,7 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -106,7 +106,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -133,18 +133,18 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 // CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -162,9 +162,9 @@ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) // CHECK1-NEXT: ret void // // @@ -181,9 +181,9 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) // CHECK1-NEXT: ret void // // @@ -209,7 +209,7 @@ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -220,7 +220,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -247,7 +247,7 @@ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -258,7 +258,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -285,18 +285,18 @@ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 // CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -314,9 +314,9 @@ // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) // CHECK2-NEXT: ret void // // @@ -333,9 +333,9 @@ // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) // CHECK2-NEXT: ret void // // Index: clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -37,7 +37,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -45,7 +45,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -104,8 +104,8 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -145,8 +145,8 @@ // CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -155,7 +155,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -163,7 +163,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -222,8 +222,8 @@ // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: @@ -263,7 +263,7 @@ // CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR5]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR5]] // CHECK2-NEXT: ret void // Index: clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -85,11 +85,11 @@ // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -99,7 +99,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -202,7 +202,7 @@ // CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -396,18 +396,18 @@ // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -501,7 +501,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -645,15 +645,15 @@ // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -716,7 +716,7 @@ // CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -833,18 +833,18 @@ // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -919,7 +919,7 @@ // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr // CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -1059,18 +1059,18 @@ // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1180,7 +1180,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 @@ -1371,11 +1371,11 @@ // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -1383,7 +1383,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -1482,7 +1482,7 @@ // CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP28]], align 8 // CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -1635,11 +1635,11 @@ // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -1649,7 +1649,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1752,7 +1752,7 @@ // CHECK2-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 // CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -1946,18 +1946,18 @@ // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2051,7 +2051,7 @@ // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 // CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -2195,15 +2195,15 @@ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2266,7 +2266,7 @@ // CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -2383,18 +2383,18 @@ // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2469,7 +2469,7 @@ // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr // CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -2609,18 +2609,18 @@ // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2729,7 +2729,7 @@ // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP29]], align 8 // CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -2916,11 +2916,11 @@ // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -2928,7 +2928,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -3027,7 +3027,7 @@ // CHECK2-NEXT: store ptr [[TMP20]], ptr [[TMP28]], align 8 // CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -3180,11 +3180,11 @@ // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -3194,7 +3194,7 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3295,7 +3295,7 @@ // CHECK3-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4 // CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5) +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -3484,18 +3484,18 @@ // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3587,7 +3587,7 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -3727,15 +3727,15 @@ // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3796,7 +3796,7 @@ // CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -3909,18 +3909,18 @@ // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3993,7 +3993,7 @@ // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK3-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP10]] to ptr // CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -4128,18 +4128,18 @@ // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -4251,7 +4251,7 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP29]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 @@ -4443,11 +4443,11 @@ // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -4455,7 +4455,7 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -4552,7 +4552,7 @@ // CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP26]], align 4 // CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5) +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: // CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 Index: clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -43,11 +43,11 @@ // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 @@ -57,7 +57,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -160,7 +160,7 @@ // CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 @@ -341,11 +341,11 @@ // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 @@ -355,7 +355,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -456,7 +456,7 @@ // CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4 // CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5) +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 Index: clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -73,11 +73,11 @@ // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -87,7 +87,7 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -190,7 +190,7 @@ // CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5), !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5), !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] @@ -408,18 +408,18 @@ // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -513,7 +513,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] @@ -681,15 +681,15 @@ // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -752,7 +752,7 @@ // CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3), !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3), !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] @@ -883,18 +883,18 @@ // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -969,7 +969,7 @@ // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 // CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr // CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP31]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] @@ -1128,11 +1128,11 @@ // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -1142,7 +1142,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1243,7 +1243,7 @@ // CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5), !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5), !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] @@ -1456,18 +1456,18 @@ // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1559,7 +1559,7 @@ // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP19]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] @@ -1723,15 +1723,15 @@ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1792,7 +1792,7 @@ // CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3), !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3), !llvm.access.group [[ACC_GRP25]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] @@ -1919,18 +1919,18 @@ // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -2003,7 +2003,7 @@ // CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 // CHECK2-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP10]] to ptr // CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP31]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: // CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] Index: clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp +++ clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp @@ -70,24 +70,24 @@ } // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l37( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) // CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_distribute_static_fini( // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l43( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) // CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_distribute_static_fini( // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l48( -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) // CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: call void @__kmpc_distribute_static_fini( @@ -95,8 +95,8 @@ // CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l53({{.+}}, i{{32|64}} [[F_IN:%.+]]) // CHECK: store {{.+}} [[F_IN]], ptr {{.+}}, -// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 true) -// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 true) +// CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false) +// CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2) // CHECK: store {{.+}} 99, ptr [[COMB_UB:%.+]], align // CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, ptr [[COMB_UB]], Index: clang/test/OpenMP/nvptx_teams_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_teams_codegen.cpp +++ clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -83,7 +83,7 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -95,7 +95,7 @@ // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -122,7 +122,7 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -134,7 +134,7 @@ // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -161,7 +161,7 @@ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -173,7 +173,7 @@ // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -200,7 +200,7 @@ // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -212,7 +212,7 @@ // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -243,19 +243,19 @@ // CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK3-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[ARGC3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC3]], align 4 +// CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC3]]) #[[ATTR3:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC3]], i64 4) -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4) +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -286,19 +286,19 @@ // CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[ARGC2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[ARGC2]], align 8 +// CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC2]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC2]], i64 8) -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8) +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -329,7 +329,7 @@ // CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK4-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -341,7 +341,7 @@ // CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) -// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void @@ -372,7 +372,7 @@ // CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: @@ -384,7 +384,7 @@ // CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] // CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) -// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK4-NEXT: ret void // CHECK4: worker.exit: // CHECK4-NEXT: ret void Index: clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp =================================================================== --- clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -57,7 +57,7 @@ // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[E]], ptr [[E_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: @@ -69,7 +69,7 @@ // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i64 8) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -95,14 +95,14 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[E1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP7]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) -// CHECK1-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 -// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP5]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) +// CHECK1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 +// CHECK1-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr [[E1]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[E1]], align 8 +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], [[TMP9]] // CHECK1-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] @@ -125,51 +125,51 @@ // CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 -// CHECK1-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) -// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK1-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK1-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]] +// CHECK1-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0 // CHECK1-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] -// CHECK1-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 -// CHECK1-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] -// CHECK1-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] -// CHECK1-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] -// CHECK1-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]] +// CHECK1-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]] +// CHECK1-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: // CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: -// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK1: then4: -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP43]], align 8 -// CHECK1-NEXT: store double [[TMP47]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK1-NEXT: store double [[TMP37]], ptr [[TMP36]], align 8 // CHECK1-NEXT: br label [[IFCONT6:%.*]] // CHECK1: else5: // CHECK1-NEXT: br label [[IFCONT6]] @@ -195,41 +195,41 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4 // CHECK1-NEXT: br label [[PRECOND:%.*]] // CHECK1: precond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 -// CHECK1-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2 +// CHECK1-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK1: body: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK1: then2: -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]] -// CHECK1-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] +// CHECK1-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 // CHECK1-NEXT: br label [[IFCONT4:%.*]] // CHECK1: else3: // CHECK1-NEXT: br label [[IFCONT4]] // CHECK1: ifcont4: -// CHECK1-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4 // CHECK1-NEXT: br label [[PRECOND]] // CHECK1: exit: // CHECK1-NEXT: ret void @@ -245,14 +245,14 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 8 -// CHECK1-NEXT: store double [[TMP12]], ptr [[TMP11]], align 128 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128 // CHECK1-NEXT: ret void // // @@ -267,13 +267,13 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP10]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -287,14 +287,14 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 128 -// CHECK1-NEXT: store double [[TMP12]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128 +// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 // CHECK1-NEXT: ret void // // @@ -309,13 +309,13 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP10]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -328,23 +328,23 @@ // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR]], align 1 -// CHECK1-NEXT: [[C2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) -// CHECK1-NEXT: store i8 [[TMP1]], ptr [[C2]], align 1 +// CHECK1-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: store i8 [[TMP1]], ptr [[C1]], align 1 // CHECK1-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR]], align 4 -// CHECK1-NEXT: [[D3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK1-NEXT: store float [[TMP2]], ptr [[D3]], align 4 +// CHECK1-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: store float [[TMP2]], ptr [[D2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C2]], ptr [[D3]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D3]], i64 4) -// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C2]], i64 1) -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i64 4) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 1) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -382,21 +382,21 @@ // CHECK1-NEXT: store ptr [[C1]], ptr [[TMP6]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 // CHECK1-NEXT: store ptr [[D2]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP10]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) -// CHECK1-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) +// CHECK1-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP0]], align 1 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[C1]], align 1 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP12]] to i32 // CHECK1-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] // CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 // CHECK1-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[D2]], align 4 -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[D2]], align 4 +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP13]], [[TMP14]] // CHECK1-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] @@ -421,71 +421,71 @@ // CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP10]], align 1 -// CHECK1-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK1-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8 -// CHECK1-NEXT: store i8 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP10]], i64 1 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK1-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP23]], i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]]) +// CHECK1-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 +// CHECK1-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i64 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]]) +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i64 1 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK1-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK1-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK1-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 // CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP41:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP41]], 0 -// CHECK1-NEXT: [[TMP43:%.*]] = and i1 [[TMP40]], [[TMP42]] -// CHECK1-NEXT: [[TMP44:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP45:%.*]] = and i1 [[TMP43]], [[TMP44]] -// CHECK1-NEXT: [[TMP46:%.*]] = or i1 [[TMP36]], [[TMP39]] -// CHECK1-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]] -// CHECK1-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK1-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK1-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: // CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: -// CHECK1-NEXT: [[TMP50:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP51:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]] -// CHECK1-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK1: then5: -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP55]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i8, ptr [[TMP54]], align 1 -// CHECK1-NEXT: store i8 [[TMP57]], ptr [[TMP56]], align 1 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load float, ptr [[TMP60]], align 4 -// CHECK1-NEXT: store float [[TMP64]], ptr [[TMP63]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1 +// CHECK1-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load float, ptr [[TMP51]], align 4 +// CHECK1-NEXT: store float [[TMP54]], ptr [[TMP53]], align 4 // CHECK1-NEXT: br label [[IFCONT7:%.*]] // CHECK1: else6: // CHECK1-NEXT: br label [[IFCONT7]] @@ -511,25 +511,25 @@ // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 -// CHECK1-NEXT: store volatile i8 [[TMP12]], ptr addrspace(3) [[TMP10]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +// CHECK1-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1 // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK1: then2: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load volatile i8, ptr addrspace(3) [[TMP14]], align 1 -// CHECK1-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1 +// CHECK1-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1 // CHECK1-NEXT: br label [[IFCONT4:%.*]] // CHECK1: else3: // CHECK1-NEXT: br label [[IFCONT4]] @@ -538,25 +538,25 @@ // CHECK1-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK1: then6: -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP23]], ptr addrspace(3) [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4 // CHECK1-NEXT: br label [[IFCONT8:%.*]] // CHECK1: else7: // CHECK1-NEXT: br label [[IFCONT8]] // CHECK1: ifcont8: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK1: then10: -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load volatile i32, ptr addrspace(3) [[TMP25]], align 4 -// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 // CHECK1-NEXT: br label [[IFCONT12:%.*]] // CHECK1: else11: // CHECK1-NEXT: br label [[IFCONT12]] @@ -574,20 +574,20 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 -// CHECK1-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 128 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP13]], align 4 -// CHECK1-NEXT: store float [[TMP16]], ptr [[TMP15]], align 128 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 +// CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 +// CHECK1-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128 // CHECK1-NEXT: ret void // // @@ -602,17 +602,17 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 // CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP12]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -626,20 +626,20 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 128 -// CHECK1-NEXT: store i8 [[TMP11]], ptr [[TMP9]], align 1 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 128 -// CHECK1-NEXT: store float [[TMP16]], ptr [[TMP13]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128 +// CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128 +// CHECK1-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 // CHECK1-NEXT: ret void // // @@ -654,17 +654,17 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 // CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP12]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -677,15 +677,15 @@ // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: // CHECK1-NEXT: ret void @@ -712,40 +712,40 @@ // CHECK1-NEXT: store i16 -32768, ptr [[B2]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[A1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[TMP14]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) -// CHECK1-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) +// CHECK1-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 -// CHECK1-NEXT: [[TMP20:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i16 [[COND]], ptr [[TMP1]], align 2 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: // CHECK1-NEXT: ret void @@ -790,30 +790,30 @@ // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[A1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP6]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) -// CHECK1-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP6]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) +// CHECK1-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK1-NEXT: [[OR5:%.*]] = or i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32 // CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] // CHECK1: cond.true9: -// CHECK1-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2 // CHECK1-NEXT: br label [[COND_END11:%.*]] // CHECK1: cond.false10: -// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2 // CHECK1-NEXT: br label [[COND_END11]] // CHECK1: cond.end11: -// CHECK1-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] +// CHECK1-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE9]] ], [ [[TMP16]], [[COND_FALSE10]] ] // CHECK1-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2 // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] @@ -836,71 +836,71 @@ // CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i64 1 -// CHECK1-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK1-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK1-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i64 1 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 -// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] -// CHECK1-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] -// CHECK1-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] -// CHECK1-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2 +// CHECK1-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 +// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]]) +// CHECK1-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK1-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK1-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK1-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK1-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK1-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK1-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: // CHECK1-NEXT: call void @"_omp$reduction$reduction_func11"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: -// CHECK1-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] -// CHECK1-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK1: then5: -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK1-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 -// CHECK1-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK1-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2 +// CHECK1-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2 // CHECK1-NEXT: br label [[IFCONT7:%.*]] // CHECK1: else6: // CHECK1-NEXT: br label [[IFCONT7]] @@ -913,7 +913,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() @@ -922,56 +922,56 @@ // CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4 // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK1: then2: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 // CHECK1-NEXT: br label [[IFCONT4:%.*]] // CHECK1: else3: // CHECK1-NEXT: br label [[IFCONT4]] // CHECK1: ifcont4: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK1: then6: -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 -// CHECK1-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2 +// CHECK1-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2 // CHECK1-NEXT: br label [[IFCONT8:%.*]] // CHECK1: else7: // CHECK1-NEXT: br label [[IFCONT8]] // CHECK1: ifcont8: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK1: then10: -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 -// CHECK1-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2 +// CHECK1-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2 // CHECK1-NEXT: br label [[IFCONT12:%.*]] // CHECK1: else11: // CHECK1-NEXT: br label [[IFCONT12]] @@ -994,71 +994,71 @@ // CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i64 1 -// CHECK1-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK1-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK1-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i64 1 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 -// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] -// CHECK1-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] -// CHECK1-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] -// CHECK1-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2 +// CHECK1-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 +// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]]) +// CHECK1-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK1-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK1-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK1-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK1-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK1-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK1-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: // CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: -// CHECK1-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] -// CHECK1-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK1: then5: -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK1-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 -// CHECK1-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK1-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2 +// CHECK1-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2 // CHECK1-NEXT: br label [[IFCONT7:%.*]] // CHECK1: else6: // CHECK1-NEXT: br label [[IFCONT7]] @@ -1071,7 +1071,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() @@ -1080,56 +1080,56 @@ // CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4 // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK1: then2: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 // CHECK1-NEXT: br label [[IFCONT4:%.*]] // CHECK1: else3: // CHECK1-NEXT: br label [[IFCONT4]] // CHECK1: ifcont4: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK1: then6: -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 -// CHECK1-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2 +// CHECK1-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2 // CHECK1-NEXT: br label [[IFCONT8:%.*]] // CHECK1: else7: // CHECK1-NEXT: br label [[IFCONT8]] // CHECK1: ifcont8: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK1: then10: -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 -// CHECK1-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2 +// CHECK1-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2 // CHECK1-NEXT: br label [[IFCONT12:%.*]] // CHECK1: else11: // CHECK1-NEXT: br label [[IFCONT12]] @@ -1147,20 +1147,20 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 128 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP14]], align 2 -// CHECK1-NEXT: store i16 [[TMP17]], ptr [[TMP16]], align 128 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 +// CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128 // CHECK1-NEXT: ret void // // @@ -1175,17 +1175,17 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 // CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP13]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -1199,20 +1199,20 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 128 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP16]], align 128 -// CHECK1-NEXT: store i16 [[TMP17]], ptr [[TMP14]], align 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128 +// CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 // CHECK1-NEXT: ret void // // @@ -1227,17 +1227,17 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 // CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP13]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -1250,7 +1250,7 @@ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: @@ -1260,7 +1260,7 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1286,14 +1286,14 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[E1]], ptr [[TMP4]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP7]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) -// CHECK2-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 -// CHECK2-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP5]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) +// CHECK2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 +// CHECK2-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load double, ptr [[E1]], align 8 -// CHECK2-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[E1]], align 8 +// CHECK2-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], [[TMP9]] // CHECK2-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] @@ -1316,51 +1316,51 @@ // CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i32 1 -// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 -// CHECK2-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) -// CHECK2-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP11]], i32 1 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK2-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK2-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK2-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1 +// CHECK2-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0 +// CHECK2-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]] +// CHECK2-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0 // CHECK2-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] -// CHECK2-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK2-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 -// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 -// CHECK2-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] -// CHECK2-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK2-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] -// CHECK2-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] -// CHECK2-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] -// CHECK2-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]] +// CHECK2-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]] +// CHECK2-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: // CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: -// CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] +// CHECK2-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK2: then4: -// CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP41]], align 4 -// CHECK2-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP44]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP43]], align 8 -// CHECK2-NEXT: store double [[TMP47]], ptr [[TMP46]], align 8 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK2-NEXT: store double [[TMP37]], ptr [[TMP36]], align 8 // CHECK2-NEXT: br label [[IFCONT6:%.*]] // CHECK2: else5: // CHECK2-NEXT: br label [[IFCONT6]] @@ -1386,41 +1386,41 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4 // CHECK2-NEXT: br label [[PRECOND:%.*]] // CHECK2: precond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 -// CHECK2-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2 +// CHECK2-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK2: body: // CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]] -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK2-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: // CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK2: then2: -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]] -// CHECK2-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4 -// CHECK2-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] +// CHECK2-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 +// CHECK2-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 // CHECK2-NEXT: br label [[IFCONT4:%.*]] // CHECK2: else3: // CHECK2-NEXT: br label [[IFCONT4]] // CHECK2: ifcont4: -// CHECK2-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4 // CHECK2-NEXT: br label [[PRECOND]] // CHECK2: exit: // CHECK2-NEXT: ret void @@ -1436,14 +1436,14 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 8 -// CHECK2-NEXT: store double [[TMP12]], ptr [[TMP11]], align 128 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128 // CHECK2-NEXT: ret void // // @@ -1458,13 +1458,13 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP10]]) #[[ATTR4]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -1478,14 +1478,14 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 128 -// CHECK2-NEXT: store double [[TMP12]], ptr [[TMP9]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128 +// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 // CHECK2-NEXT: ret void // // @@ -1500,13 +1500,13 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP10]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -1519,23 +1519,23 @@ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR]], align 1 -// CHECK2-NEXT: [[C2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) -// CHECK2-NEXT: store i8 [[TMP1]], ptr [[C2]], align 1 +// CHECK2-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: store i8 [[TMP1]], ptr [[C1]], align 1 // CHECK2-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR]], align 4 -// CHECK2-NEXT: [[D3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK2-NEXT: store float [[TMP2]], ptr [[D3]], align 4 +// CHECK2-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: store float [[TMP2]], ptr [[D2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C2]], ptr [[D3]]) #[[ATTR4]] -// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D3]], i32 4) -// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C2]], i32 1) -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]] +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1573,21 +1573,21 @@ // CHECK2-NEXT: store ptr [[C1]], ptr [[TMP6]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 // CHECK2-NEXT: store ptr [[D2]], ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP10]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) -// CHECK2-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 -// CHECK2-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) +// CHECK2-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +// CHECK2-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK2-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP0]], align 1 +// CHECK2-NEXT: [[CONV4:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK2-NEXT: [[TMP12:%.*]] = load i8, ptr [[C1]], align 1 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP12]] to i32 // CHECK2-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] // CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 // CHECK2-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 -// CHECK2-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[D2]], align 4 -// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]] +// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load float, ptr [[D2]], align 4 +// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP13]], [[TMP14]] // CHECK2-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] @@ -1612,71 +1612,71 @@ // CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1 -// CHECK2-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP10]], align 1 -// CHECK2-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16 -// CHECK2-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK2-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8 -// CHECK2-NEXT: store i8 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP11]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP21]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i32 1 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK2-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK2-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP23]], i32 1 -// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP24]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 +// CHECK2-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16 +// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]]) +// CHECK2-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 +// CHECK2-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i32 1 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16 +// CHECK2-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]]) +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i32 1 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK2-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK2-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK2-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 // CHECK2-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK2-NEXT: [[TMP41:%.*]] = and i16 [[TMP6]], 1 -// CHECK2-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP41]], 0 -// CHECK2-NEXT: [[TMP43:%.*]] = and i1 [[TMP40]], [[TMP42]] -// CHECK2-NEXT: [[TMP44:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK2-NEXT: [[TMP45:%.*]] = and i1 [[TMP43]], [[TMP44]] -// CHECK2-NEXT: [[TMP46:%.*]] = or i1 [[TMP36]], [[TMP39]] -// CHECK2-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]] -// CHECK2-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK2-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK2-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: // CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: -// CHECK2-NEXT: [[TMP50:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP51:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]] -// CHECK2-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK2-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK2: then5: -// CHECK2-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP53]], align 4 -// CHECK2-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP55]], align 4 -// CHECK2-NEXT: [[TMP57:%.*]] = load i8, ptr [[TMP54]], align 1 -// CHECK2-NEXT: store i8 [[TMP57]], ptr [[TMP56]], align 1 -// CHECK2-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP58]], align 4 -// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 -// CHECK2-NEXT: [[TMP64:%.*]] = load float, ptr [[TMP60]], align 4 -// CHECK2-NEXT: store float [[TMP64]], ptr [[TMP63]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1 +// CHECK2-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1 +// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load float, ptr [[TMP51]], align 4 +// CHECK2-NEXT: store float [[TMP54]], ptr [[TMP53]], align 4 // CHECK2-NEXT: br label [[IFCONT7:%.*]] // CHECK2: else6: // CHECK2-NEXT: br label [[IFCONT7]] @@ -1702,25 +1702,25 @@ // CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 -// CHECK2-NEXT: store volatile i8 [[TMP12]], ptr addrspace(3) [[TMP10]], align 1 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +// CHECK2-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1 // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: // CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK2: then2: -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load volatile i8, ptr addrspace(3) [[TMP14]], align 1 -// CHECK2-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1 +// CHECK2-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1 // CHECK2-NEXT: br label [[IFCONT4:%.*]] // CHECK2: else3: // CHECK2-NEXT: br label [[IFCONT4]] @@ -1729,25 +1729,25 @@ // CHECK2-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK2: then6: -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK2-NEXT: store volatile i32 [[TMP23]], ptr addrspace(3) [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK2-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4 // CHECK2-NEXT: br label [[IFCONT8:%.*]] // CHECK2: else7: // CHECK2-NEXT: br label [[IFCONT8]] // CHECK2: ifcont8: // CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK2: then10: -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load volatile i32, ptr addrspace(3) [[TMP25]], align 4 -// CHECK2-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 // CHECK2-NEXT: br label [[IFCONT12:%.*]] // CHECK2: else11: // CHECK2-NEXT: br label [[IFCONT12]] @@ -1765,20 +1765,20 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 -// CHECK2-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 128 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP13]], align 4 -// CHECK2-NEXT: store float [[TMP16]], ptr [[TMP15]], align 128 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 +// CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 +// CHECK2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128 // CHECK2-NEXT: ret void // // @@ -1793,17 +1793,17 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 // CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP12]]) #[[ATTR4]] +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -1817,20 +1817,20 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 128 -// CHECK2-NEXT: store i8 [[TMP11]], ptr [[TMP9]], align 1 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 128 -// CHECK2-NEXT: store float [[TMP16]], ptr [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128 +// CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128 +// CHECK2-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 // CHECK2-NEXT: ret void // // @@ -1845,17 +1845,17 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 // CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP12]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -1868,15 +1868,15 @@ // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] -// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: // CHECK2-NEXT: ret void @@ -1903,40 +1903,40 @@ // CHECK2-NEXT: store i16 -32768, ptr [[B2]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[A1]], ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP4]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP11]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[TMP14]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) -// CHECK2-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 -// CHECK2-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) +// CHECK2-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +// CHECK2-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 -// CHECK2-NEXT: [[TMP20:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK2-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK2-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK2-NEXT: store i16 [[COND]], ptr [[TMP1]], align 2 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: // CHECK2-NEXT: ret void @@ -1981,30 +1981,30 @@ // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[A1]], ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) -// CHECK2-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK2-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP6]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) +// CHECK2-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +// CHECK2-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK2-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK2-NEXT: [[OR5:%.*]] = or i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK2-NEXT: [[TMP17:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK2-NEXT: [[CONV6:%.*]] = sext i16 [[TMP13]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32 // CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] // CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] // CHECK2: cond.true9: -// CHECK2-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK2-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2 // CHECK2-NEXT: br label [[COND_END11:%.*]] // CHECK2: cond.false10: -// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK2-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2 // CHECK2-NEXT: br label [[COND_END11]] // CHECK2: cond.end11: -// CHECK2-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] +// CHECK2-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE9]] ], [ [[TMP16]], [[COND_FALSE10]] ] // CHECK2-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2 // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] @@ -2027,71 +2027,71 @@ // CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 -// CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK2-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 -// CHECK2-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK2-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK2-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK2-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK2-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 -// CHECK2-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 -// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] -// CHECK2-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK2-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] -// CHECK2-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] -// CHECK2-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] -// CHECK2-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2 +// CHECK2-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK2-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 +// CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]]) +// CHECK2-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK2-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK2-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK2-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK2-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK2-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK2-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK2-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: // CHECK2-NEXT: call void @"_omp$reduction$reduction_func11"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: -// CHECK2-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] -// CHECK2-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK2-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK2: then5: -// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 4 -// CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 4 -// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK2-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 -// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 -// CHECK2-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 4 -// CHECK2-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 -// CHECK2-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 +// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK2-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2 +// CHECK2-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2 // CHECK2-NEXT: br label [[IFCONT7:%.*]] // CHECK2: else6: // CHECK2-NEXT: br label [[IFCONT7]] @@ -2104,7 +2104,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() @@ -2113,56 +2113,56 @@ // CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 // CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK2-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK2-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4 // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK2: then2: -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 // CHECK2-NEXT: br label [[IFCONT4:%.*]] // CHECK2: else3: // CHECK2-NEXT: br label [[IFCONT4]] // CHECK2: ifcont4: -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK2-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK2: then6: -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 -// CHECK2-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2 +// CHECK2-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2 // CHECK2-NEXT: br label [[IFCONT8:%.*]] // CHECK2: else7: // CHECK2-NEXT: br label [[IFCONT8]] // CHECK2: ifcont8: -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK2: then10: -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 -// CHECK2-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2 +// CHECK2-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2 // CHECK2-NEXT: br label [[IFCONT12:%.*]] // CHECK2: else11: // CHECK2-NEXT: br label [[IFCONT12]] @@ -2185,71 +2185,71 @@ // CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 -// CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK2-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 -// CHECK2-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK2-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK2-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK2-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK2-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 -// CHECK2-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 -// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] -// CHECK2-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK2-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] -// CHECK2-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] -// CHECK2-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] -// CHECK2-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2 +// CHECK2-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK2-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 +// CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]]) +// CHECK2-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK2-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK2-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK2-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK2-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK2-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK2-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK2-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: // CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: -// CHECK2-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] -// CHECK2-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK2-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK2: then5: -// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 4 -// CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 4 -// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK2-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 -// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 -// CHECK2-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 4 -// CHECK2-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 -// CHECK2-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 +// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK2-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2 +// CHECK2-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2 // CHECK2-NEXT: br label [[IFCONT7:%.*]] // CHECK2: else6: // CHECK2-NEXT: br label [[IFCONT7]] @@ -2262,7 +2262,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() @@ -2271,56 +2271,56 @@ // CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 // CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK2-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK2-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4 // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK2: then2: -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 // CHECK2-NEXT: br label [[IFCONT4:%.*]] // CHECK2: else3: // CHECK2-NEXT: br label [[IFCONT4]] // CHECK2: ifcont4: -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK2-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK2: then6: -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 -// CHECK2-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2 +// CHECK2-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2 // CHECK2-NEXT: br label [[IFCONT8:%.*]] // CHECK2: else7: // CHECK2-NEXT: br label [[IFCONT8]] // CHECK2: ifcont8: -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK2: then10: -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 -// CHECK2-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2 +// CHECK2-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2 // CHECK2-NEXT: br label [[IFCONT12:%.*]] // CHECK2: else11: // CHECK2-NEXT: br label [[IFCONT12]] @@ -2338,20 +2338,20 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 128 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP14]], align 2 -// CHECK2-NEXT: store i16 [[TMP17]], ptr [[TMP16]], align 128 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 +// CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128 // CHECK2-NEXT: ret void // // @@ -2366,17 +2366,17 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 // CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP13]]) #[[ATTR4]] +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -2390,20 +2390,20 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 128 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP16]], align 128 -// CHECK2-NEXT: store i16 [[TMP17]], ptr [[TMP14]], align 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128 +// CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 // CHECK2-NEXT: ret void // // @@ -2418,17 +2418,17 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 // CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP13]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -2441,7 +2441,7 @@ // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: @@ -2451,7 +2451,7 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -2477,14 +2477,14 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[E1]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP7]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) -// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 -// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP5]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) +// CHECK3-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 +// CHECK3-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load double, ptr [[E1]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP0]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[E1]], align 8 +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], [[TMP9]] // CHECK3-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] @@ -2507,51 +2507,51 @@ // CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 -// CHECK3-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) -// CHECK3-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP11]], i32 1 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK3-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK3-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0 +// CHECK3-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]] +// CHECK3-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0 // CHECK3-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] -// CHECK3-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK3-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 -// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 -// CHECK3-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] -// CHECK3-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] -// CHECK3-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] -// CHECK3-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] -// CHECK3-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]] +// CHECK3-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]] +// CHECK3-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: // CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: -// CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK3-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] +// CHECK3-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK3: then4: -// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP41]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP44]], align 4 -// CHECK3-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP43]], align 8 -// CHECK3-NEXT: store double [[TMP47]], ptr [[TMP46]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK3-NEXT: store double [[TMP37]], ptr [[TMP36]], align 8 // CHECK3-NEXT: br label [[IFCONT6:%.*]] // CHECK3: else5: // CHECK3-NEXT: br label [[IFCONT6]] @@ -2577,41 +2577,41 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4 // CHECK3-NEXT: br label [[PRECOND:%.*]] // CHECK3: precond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 -// CHECK3-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2 +// CHECK3-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK3: body: // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]] -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK3: then2: -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]] -// CHECK3-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] +// CHECK3-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 // CHECK3-NEXT: br label [[IFCONT4:%.*]] // CHECK3: else3: // CHECK3-NEXT: br label [[IFCONT4]] // CHECK3: ifcont4: -// CHECK3-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4 // CHECK3-NEXT: br label [[PRECOND]] // CHECK3: exit: // CHECK3-NEXT: ret void @@ -2627,14 +2627,14 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 8 -// CHECK3-NEXT: store double [[TMP12]], ptr [[TMP11]], align 128 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK3-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128 // CHECK3-NEXT: ret void // // @@ -2649,13 +2649,13 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP10]]) #[[ATTR4]] +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -2669,14 +2669,14 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 128 -// CHECK3-NEXT: store double [[TMP12]], ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128 +// CHECK3-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 // CHECK3-NEXT: ret void // // @@ -2691,13 +2691,13 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP10]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -2710,23 +2710,23 @@ // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR]], align 1 -// CHECK3-NEXT: [[C2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) -// CHECK3-NEXT: store i8 [[TMP1]], ptr [[C2]], align 1 +// CHECK3-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK3-NEXT: store i8 [[TMP1]], ptr [[C1]], align 1 // CHECK3-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[D3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK3-NEXT: store float [[TMP2]], ptr [[D3]], align 4 +// CHECK3-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: store float [[TMP2]], ptr [[D2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C2]], ptr [[D3]]) #[[ATTR4]] -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D3]], i32 4) -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C2]], i32 1) -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]] +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -2764,21 +2764,21 @@ // CHECK3-NEXT: store ptr [[C1]], ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 // CHECK3-NEXT: store ptr [[D2]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP10]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) -// CHECK3-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) +// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP0]], align 1 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP11]] to i32 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[C1]], align 1 +// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP12]] to i32 // CHECK3-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] // CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 // CHECK3-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[D2]], align 4 -// CHECK3-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[D2]], align 4 +// CHECK3-NEXT: [[MUL8:%.*]] = fmul float [[TMP13]], [[TMP14]] // CHECK3-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] @@ -2803,71 +2803,71 @@ // CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP10]], align 1 -// CHECK3-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK3-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8 -// CHECK3-NEXT: store i8 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i32 1 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK3-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK3-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP23]], i32 1 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 +// CHECK3-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]]) +// CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 +// CHECK3-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16 +// CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]]) +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i32 1 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK3-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK3-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK3-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 // CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK3-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK3-NEXT: [[TMP41:%.*]] = and i16 [[TMP6]], 1 -// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP41]], 0 -// CHECK3-NEXT: [[TMP43:%.*]] = and i1 [[TMP40]], [[TMP42]] -// CHECK3-NEXT: [[TMP44:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP45:%.*]] = and i1 [[TMP43]], [[TMP44]] -// CHECK3-NEXT: [[TMP46:%.*]] = or i1 [[TMP36]], [[TMP39]] -// CHECK3-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]] -// CHECK3-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: // CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: -// CHECK3-NEXT: [[TMP50:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP51:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]] -// CHECK3-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK3: then5: -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP53]], align 4 -// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP55]], align 4 -// CHECK3-NEXT: [[TMP57:%.*]] = load i8, ptr [[TMP54]], align 1 -// CHECK3-NEXT: store i8 [[TMP57]], ptr [[TMP56]], align 1 -// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP58]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 -// CHECK3-NEXT: [[TMP64:%.*]] = load float, ptr [[TMP60]], align 4 -// CHECK3-NEXT: store float [[TMP64]], ptr [[TMP63]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1 +// CHECK3-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = load float, ptr [[TMP51]], align 4 +// CHECK3-NEXT: store float [[TMP54]], ptr [[TMP53]], align 4 // CHECK3-NEXT: br label [[IFCONT7:%.*]] // CHECK3: else6: // CHECK3-NEXT: br label [[IFCONT7]] @@ -2893,25 +2893,25 @@ // CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 -// CHECK3-NEXT: store volatile i8 [[TMP12]], ptr addrspace(3) [[TMP10]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +// CHECK3-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1 // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK3: then2: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load volatile i8, ptr addrspace(3) [[TMP14]], align 1 -// CHECK3-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1 +// CHECK3-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1 // CHECK3-NEXT: br label [[IFCONT4:%.*]] // CHECK3: else3: // CHECK3-NEXT: br label [[IFCONT4]] @@ -2920,25 +2920,25 @@ // CHECK3-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK3: then6: -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP23]], ptr addrspace(3) [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4 // CHECK3-NEXT: br label [[IFCONT8:%.*]] // CHECK3: else7: // CHECK3-NEXT: br label [[IFCONT8]] // CHECK3: ifcont8: // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK3: then10: -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load volatile i32, ptr addrspace(3) [[TMP25]], align 4 -// CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 // CHECK3-NEXT: br label [[IFCONT12:%.*]] // CHECK3: else11: // CHECK3-NEXT: br label [[IFCONT12]] @@ -2956,20 +2956,20 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 -// CHECK3-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 128 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP13]], align 4 -// CHECK3-NEXT: store float [[TMP16]], ptr [[TMP15]], align 128 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 +// CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128 // CHECK3-NEXT: ret void // // @@ -2984,17 +2984,17 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 // CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP12]]) #[[ATTR4]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -3008,20 +3008,20 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 128 -// CHECK3-NEXT: store i8 [[TMP11]], ptr [[TMP9]], align 1 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 128 -// CHECK3-NEXT: store float [[TMP16]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128 +// CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128 +// CHECK3-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 // CHECK3-NEXT: ret void // // @@ -3036,17 +3036,17 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 // CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP12]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -3059,15 +3059,15 @@ // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false, i1 true) +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] -// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: // CHECK3-NEXT: ret void @@ -3094,40 +3094,40 @@ // CHECK3-NEXT: store i16 -32768, ptr [[B2]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[A1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[TMP14]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) -// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) +// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 -// CHECK3-NEXT: [[TMP20:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i16 [[COND]], ptr [[TMP1]], align 2 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: // CHECK3-NEXT: ret void @@ -3172,30 +3172,30 @@ // CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[A1]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) -// CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP6]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) +// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK3-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK3-NEXT: [[OR5:%.*]] = or i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP13]] to i32 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK3-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] // CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] // CHECK3: cond.true9: -// CHECK3-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2 // CHECK3-NEXT: br label [[COND_END11:%.*]] // CHECK3: cond.false10: -// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2 // CHECK3-NEXT: br label [[COND_END11]] // CHECK3: cond.end11: -// CHECK3-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] +// CHECK3-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE9]] ], [ [[TMP16]], [[COND_FALSE10]] ] // CHECK3-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2 // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] @@ -3218,71 +3218,71 @@ // CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK3-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 -// CHECK3-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK3-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK3-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK3-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK3-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK3-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 -// CHECK3-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 -// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] -// CHECK3-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] -// CHECK3-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] -// CHECK3-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] -// CHECK3-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2 +// CHECK3-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK3-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 +// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]]) +// CHECK3-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK3-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK3-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK3-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK3-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK3-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: // CHECK3-NEXT: call void @"_omp$reduction$reduction_func11"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: -// CHECK3-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] -// CHECK3-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK3: then5: -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 4 -// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 4 -// CHECK3-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK3-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 -// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 4 -// CHECK3-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 -// CHECK3-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK3-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2 +// CHECK3-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2 // CHECK3-NEXT: br label [[IFCONT7:%.*]] // CHECK3: else6: // CHECK3-NEXT: br label [[IFCONT7]] @@ -3295,7 +3295,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() @@ -3304,56 +3304,56 @@ // CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4 // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK3: then2: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 // CHECK3-NEXT: br label [[IFCONT4:%.*]] // CHECK3: else3: // CHECK3-NEXT: br label [[IFCONT4]] // CHECK3: ifcont4: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK3-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK3: then6: -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 -// CHECK3-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2 +// CHECK3-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2 // CHECK3-NEXT: br label [[IFCONT8:%.*]] // CHECK3: else7: // CHECK3-NEXT: br label [[IFCONT8]] // CHECK3: ifcont8: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK3: then10: -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 -// CHECK3-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2 +// CHECK3-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2 // CHECK3-NEXT: br label [[IFCONT12:%.*]] // CHECK3: else11: // CHECK3-NEXT: br label [[IFCONT12]] @@ -3376,71 +3376,71 @@ // CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK3-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 -// CHECK3-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK3-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK3-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK3-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK3-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK3-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 -// CHECK3-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 -// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] -// CHECK3-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] -// CHECK3-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] -// CHECK3-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] -// CHECK3-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2 +// CHECK3-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK3-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 +// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]]) +// CHECK3-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK3-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK3-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK3-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK3-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK3-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: // CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: -// CHECK3-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] -// CHECK3-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK3: then5: -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 4 -// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 4 -// CHECK3-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK3-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 -// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 4 -// CHECK3-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 -// CHECK3-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK3-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2 +// CHECK3-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2 // CHECK3-NEXT: br label [[IFCONT7:%.*]] // CHECK3: else6: // CHECK3-NEXT: br label [[IFCONT7]] @@ -3453,7 +3453,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() @@ -3462,56 +3462,56 @@ // CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4 // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK3: then2: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 // CHECK3-NEXT: br label [[IFCONT4:%.*]] // CHECK3: else3: // CHECK3-NEXT: br label [[IFCONT4]] // CHECK3: ifcont4: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK3-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK3: then6: -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 -// CHECK3-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2 +// CHECK3-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2 // CHECK3-NEXT: br label [[IFCONT8:%.*]] // CHECK3: else7: // CHECK3-NEXT: br label [[IFCONT8]] // CHECK3: ifcont8: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK3: then10: -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 -// CHECK3-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2 +// CHECK3-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2 // CHECK3-NEXT: br label [[IFCONT12:%.*]] // CHECK3: else11: // CHECK3-NEXT: br label [[IFCONT12]] @@ -3529,20 +3529,20 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 128 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP14]], align 2 -// CHECK3-NEXT: store i16 [[TMP17]], ptr [[TMP16]], align 128 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 +// CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128 // CHECK3-NEXT: ret void // // @@ -3557,17 +3557,17 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 // CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP13]]) #[[ATTR4]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -3581,20 +3581,20 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 128 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP16]], align 128 -// CHECK3-NEXT: store i16 [[TMP17]], ptr [[TMP14]], align 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128 +// CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 // CHECK3-NEXT: ret void // // @@ -3609,16 +3609,16 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 // CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP13]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK3-NEXT: ret void // Index: clang/test/OpenMP/reduction_implicit_map.cpp =================================================================== --- clang/test/OpenMP/reduction_implicit_map.cpp +++ clang/test/OpenMP/reduction_implicit_map.cpp @@ -103,7 +103,7 @@ // CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 // CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: @@ -112,7 +112,7 @@ // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8 // CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-NEXT: ret void // CHECK: worker.exit: // CHECK-NEXT: ret void Index: clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c =================================================================== --- clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c +++ clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c @@ -86,7 +86,7 @@ #pragma omp begin declare target device_type(nohost) __attribute__((weak)) extern "C" int __kmpc_target_init(void *Ident, char Mode, - bool UseGenericStateMachine, bool) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}} + bool UseGenericStateMachine) { // all-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}} return 0; } #pragma omp end declare target Index: clang/test/OpenMP/remarks_parallel_in_target_state_machine.c =================================================================== --- clang/test/OpenMP/remarks_parallel_in_target_state_machine.c +++ clang/test/OpenMP/remarks_parallel_in_target_state_machine.c @@ -42,7 +42,7 @@ #pragma omp begin declare target device_type(nohost) __attribute__((weak)) extern "C" int __kmpc_target_init(void *Ident, char Mode, - bool UseGenericStateMachine, bool) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}} + bool UseGenericStateMachine) { // expected-remark {{Could not internalize function. Some optimizations may not be possible. [OMP140]}} return 0; } #pragma omp end declare target Index: clang/test/OpenMP/target_globals_codegen.cpp =================================================================== --- clang/test/OpenMP/target_globals_codegen.cpp +++ clang/test/OpenMP/target_globals_codegen.cpp @@ -8,8 +8,8 @@ // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-teams-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-TEAMS // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-no-thread-state -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-STATE // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-no-nested-parallelism -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-NESTED -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -nogpulib -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-RUNTIME -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-teams-oversubscription -fopenmp-is-device -o - | FileCheck %s --check-prefix=CHECK-RUNTIME +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -nogpulib -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-RUNTIME1 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-assume-teams-oversubscription -fopenmp-is-device -o - | FileCheck %s --check-prefix=CHECK-RUNTIME2 // expected-no-diagnostics #ifndef HEADER @@ -21,48 +21,75 @@ // CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 +// CHECK: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK: @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode = weak protected constant i8 1 +// CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode], section "llvm.metadata" //. // CHECK-EQ: @__omp_rtl_debug_kind = weak_odr hidden constant i32 111 // CHECK-EQ: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 // CHECK-EQ: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK-EQ: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK-EQ: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 +// CHECK-EQ: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-EQ: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-EQ: @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode = weak protected constant i8 1 +// CHECK-EQ: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode], section "llvm.metadata" //. // CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 // CHECK-DEFAULT: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 // CHECK-DEFAULT: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK-DEFAULT: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK-DEFAULT: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 +// CHECK-DEFAULT: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-DEFAULT: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-DEFAULT: @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode = weak protected constant i8 1 +// CHECK-DEFAULT: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode], section "llvm.metadata" //. // CHECK-THREADS: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 // CHECK-THREADS: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 // CHECK-THREADS: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1 // CHECK-THREADS: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK-THREADS: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 +// CHECK-THREADS: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-THREADS: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-THREADS: @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode = weak protected constant i8 1 +// CHECK-THREADS: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode], section "llvm.metadata" //. // CHECK-TEAMS: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 // CHECK-TEAMS: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1 // CHECK-TEAMS: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK-TEAMS: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK-TEAMS: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 +// CHECK-TEAMS: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-TEAMS: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-TEAMS: @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode = weak protected constant i8 1 +// CHECK-TEAMS: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode], section "llvm.metadata" //. // CHECK-STATE: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 // CHECK-STATE: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 // CHECK-STATE: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK-STATE: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1 // CHECK-STATE: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 +// CHECK-STATE: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-STATE: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-STATE: @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode = weak protected constant i8 1 +// CHECK-STATE: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode], section "llvm.metadata" //. // CHECK-NESTED: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 // CHECK-NESTED: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 // CHECK-NESTED: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK-NESTED: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK-NESTED: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 1 +// CHECK-NESTED: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-NESTED: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-NESTED: @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode = weak protected constant i8 1 +// CHECK-NESTED: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode], section "llvm.metadata" //. -// CHECK-RUNTIME-NOT: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 -// CHECK-RUNTIME-NOT: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1 -// CHECK-RUNTIME-NOT: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 -// CHECK-RUNTIME-NOT: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 -// CHECK-RUNTIME-NOT: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 +// CHECK-RUNTIME1: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-RUNTIME1: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-RUNTIME1: @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode = weak protected constant i8 1 +// CHECK-RUNTIME1: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_16_16ffc5f__Z3foov_l95_exec_mode], section "llvm.metadata" //. void foo() { #pragma omp target @@ -70,3 +97,95 @@ } #endif +// CHECK-RUNTIME: !0 = !{i32 1, !"wchar_size", i32 4} +// CHECK-RUNTIME: !1 = !{i32 7, !"openmp", i32 50} +// CHECK-RUNTIME: !2 = !{i32 7, !"openmp-device", i32 50} +// CHECK-RUNTIME: !3 = !{!"clang version 16.0.0"} +//. +// CHECK: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK-EQ: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK-DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK-THREADS: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK-TEAMS: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK-STATE: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK-NESTED: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK-RUNTIME1: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK: !0 = !{i32 0, i32 22, i32 24116319, !"_Z3foov", i32 95, i32 0, i32 0} +// CHECK: !1 = !{ptr @__omp_offloading_16_16ffc5f__Z3foov_l95, !"kernel", i32 1} +// CHECK: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK: !3 = !{i32 7, !"openmp", i32 50} +// CHECK: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK: !5 = !{!"clang version 16.0.0"} +//. +// CHECK-EQ: !0 = !{i32 0, i32 22, i32 24116319, !"_Z3foov", i32 95, i32 0, i32 0} +// CHECK-EQ: !1 = !{ptr @__omp_offloading_16_16ffc5f__Z3foov_l95, !"kernel", i32 1} +// CHECK-EQ: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK-EQ: !3 = !{i32 7, !"openmp", i32 50} +// CHECK-EQ: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK-EQ: !5 = !{!"clang version 16.0.0"} +//. +// CHECK-DEFAULT: !0 = !{i32 0, i32 22, i32 24116319, !"_Z3foov", i32 95, i32 0, i32 0} +// CHECK-DEFAULT: !1 = !{ptr @__omp_offloading_16_16ffc5f__Z3foov_l95, !"kernel", i32 1} +// CHECK-DEFAULT: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK-DEFAULT: !3 = !{i32 7, !"openmp", i32 50} +// CHECK-DEFAULT: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK-DEFAULT: !5 = !{!"clang version 16.0.0"} +//. +// CHECK-THREADS: !0 = !{i32 0, i32 22, i32 24116319, !"_Z3foov", i32 95, i32 0, i32 0} +// CHECK-THREADS: !1 = !{ptr @__omp_offloading_16_16ffc5f__Z3foov_l95, !"kernel", i32 1} +// CHECK-THREADS: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK-THREADS: !3 = !{i32 7, !"openmp", i32 50} +// CHECK-THREADS: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK-THREADS: !5 = !{!"clang version 16.0.0"} +//. +// CHECK-TEAMS: !0 = !{i32 0, i32 22, i32 24116319, !"_Z3foov", i32 95, i32 0, i32 0} +// CHECK-TEAMS: !1 = !{ptr @__omp_offloading_16_16ffc5f__Z3foov_l95, !"kernel", i32 1} +// CHECK-TEAMS: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK-TEAMS: !3 = !{i32 7, !"openmp", i32 50} +// CHECK-TEAMS: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK-TEAMS: !5 = !{!"clang version 16.0.0"} +//. +// CHECK-STATE: !0 = !{i32 0, i32 22, i32 24116319, !"_Z3foov", i32 95, i32 0, i32 0} +// CHECK-STATE: !1 = !{ptr @__omp_offloading_16_16ffc5f__Z3foov_l95, !"kernel", i32 1} +// CHECK-STATE: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK-STATE: !3 = !{i32 7, !"openmp", i32 50} +// CHECK-STATE: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK-STATE: !5 = !{!"clang version 16.0.0"} +//. +// CHECK-NESTED: !0 = !{i32 0, i32 22, i32 24116319, !"_Z3foov", i32 95, i32 0, i32 0} +// CHECK-NESTED: !1 = !{ptr @__omp_offloading_16_16ffc5f__Z3foov_l95, !"kernel", i32 1} +// CHECK-NESTED: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK-NESTED: !3 = !{i32 7, !"openmp", i32 50} +// CHECK-NESTED: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK-NESTED: !5 = !{!"clang version 16.0.0"} +//. +// CHECK-RUNTIME1: !0 = !{i32 0, i32 22, i32 24116319, !"_Z3foov", i32 95, i32 0, i32 0} +// CHECK-RUNTIME1: !1 = !{ptr @__omp_offloading_16_16ffc5f__Z3foov_l95, !"kernel", i32 1} +// CHECK-RUNTIME1: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK-RUNTIME1: !3 = !{i32 7, !"openmp", i32 50} +// CHECK-RUNTIME1: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK-RUNTIME1: !5 = !{!"clang version 16.0.0"} +//. +// CHECK-RUNTIME2: !0 = !{i32 1, !"wchar_size", i32 4} +// CHECK-RUNTIME2: !1 = !{i32 7, !"openmp", i32 50} +// CHECK-RUNTIME2: !2 = !{i32 7, !"openmp-device", i32 50} +// CHECK-RUNTIME2: !3 = !{!"clang version 16.0.0"} +//. +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} +// CHECK-DEFAULT: {{.*}} +// CHECK-EQ: {{.*}} +// CHECK-NESTED: {{.*}} +// CHECK-RUNTIME1: {{.*}} +// CHECK-RUNTIME2: {{.*}} +// CHECK-STATE: {{.*}} +// CHECK-TEAMS: {{.*}} +// CHECK-THREADS: {{.*}} Index: clang/test/OpenMP/target_has_device_addr_codegen.cpp =================================================================== --- clang/test/OpenMP/target_has_device_addr_codegen.cpp +++ clang/test/OpenMP/target_has_device_addr_codegen.cpp @@ -350,184 +350,184 @@ // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[Z]], align 8 // CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP19]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP20]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP21]], align 8 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP21]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP24]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP25]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8 -// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.5, ptr [[TMP29]], align 8 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP30]], align 8 -// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.5, ptr [[TMP28]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP31]], align 8 -// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP33]], align 8 -// CHECK-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP32]], align 8 +// CHECK-NEXT: [[TMP33:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, ptr [[KERNEL_ARGS4]]) +// CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK-NEXT: br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] // CHECK: omp_offload.failed5: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(ptr [[TMP18]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT6]] // CHECK: omp_offload.cont6: -// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[AA]], ptr [[TMP35]], align 8 +// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: store ptr [[AA]], ptr [[TMP36]], align 8 -// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[AA]], ptr [[TMP37]], align 8 -// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 -// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP37]], align 8 +// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP40]], align 4 +// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP41]], align 4 -// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP42]], align 4 -// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP38]], ptr [[TMP42]], align 8 +// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP39]], ptr [[TMP43]], align 8 -// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP40]], ptr [[TMP44]], align 8 -// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.7, ptr [[TMP45]], align 8 -// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP46]], align 8 -// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.7, ptr [[TMP44]], align 8 +// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP45]], align 8 +// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP46]], align 8 +// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP47]], align 8 -// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP49]], align 8 -// CHECK-NEXT: [[TMP50:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l149.region_id, ptr [[KERNEL_ARGS10]]) -// CHECK-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 -// CHECK-NEXT: br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP48]], align 8 +// CHECK-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l149.region_id, ptr [[KERNEL_ARGS10]]) +// CHECK-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] // CHECK: omp_offload.failed11: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l149(ptr [[AA]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT12]] // CHECK: omp_offload.cont12: -// CHECK-NEXT: [[TMP52:%.*]] = load ptr, ptr [[RAA]], align 8 -// CHECK-NEXT: store ptr [[TMP52]], ptr [[_TMP13]], align 8 -// CHECK-NEXT: [[TMP53:%.*]] = load ptr, ptr [[_TMP13]], align 8 -// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP53]], ptr [[TMP55]], align 8 -// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP53]], ptr [[TMP56]], align 8 -// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS16]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP57]], align 8 -// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0 -// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0 +// CHECK-NEXT: [[TMP51:%.*]] = load ptr, ptr [[RAA]], align 8 +// CHECK-NEXT: store ptr [[TMP51]], ptr [[_TMP13]], align 8 +// CHECK-NEXT: [[TMP52:%.*]] = load ptr, ptr [[_TMP13]], align 8 +// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP52]], ptr [[TMP53]], align 8 +// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP52]], ptr [[TMP54]], align 8 +// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS16]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP55]], align 8 +// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0 +// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0 -// CHECK-NEXT: store i32 1, ptr [[TMP60]], align 4 -// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP61]], align 4 -// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[TMP58]], ptr [[TMP62]], align 8 -// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP59]], ptr [[TMP63]], align 8 -// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.9, ptr [[TMP64]], align 8 -// CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP65]], align 8 -// CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 6 -// CHECK-NEXT: store ptr null, ptr [[TMP66]], align 8 -// CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP67]], align 8 -// CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP68]], align 8 -// CHECK-NEXT: [[TMP69:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, ptr [[KERNEL_ARGS17]]) -// CHECK-NEXT: [[TMP70:%.*]] = icmp ne i32 [[TMP69]], 0 -// CHECK-NEXT: br i1 [[TMP70]], label [[OMP_OFFLOAD_FAILED18:%.*]], label [[OMP_OFFLOAD_CONT19:%.*]] +// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP58]], align 4 +// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1 +// CHECK-NEXT: store i32 1, ptr [[TMP59]], align 4 +// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP56]], ptr [[TMP60]], align 8 +// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[TMP57]], ptr [[TMP61]], align 8 +// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.9, ptr [[TMP62]], align 8 +// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP63]], align 8 +// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP64]], align 8 +// CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 7 +// CHECK-NEXT: store ptr null, ptr [[TMP65]], align 8 +// CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP66]], align 8 +// CHECK-NEXT: [[TMP67:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, ptr [[KERNEL_ARGS17]]) +// CHECK-NEXT: [[TMP68:%.*]] = icmp ne i32 [[TMP67]], 0 +// CHECK-NEXT: br i1 [[TMP68]], label [[OMP_OFFLOAD_FAILED18:%.*]], label [[OMP_OFFLOAD_CONT19:%.*]] // CHECK: omp_offload.failed18: -// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(ptr [[TMP53]]) #[[ATTR5]] +// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(ptr [[TMP52]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT19]] // CHECK: omp_offload.cont19: -// CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[H]], ptr [[TMP71]], align 8 -// CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[H]], ptr [[TMP72]], align 8 -// CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP73]], align 8 -// CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 -// CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[H]], ptr [[TMP69]], align 8 +// CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[H]], ptr [[TMP70]], align 8 +// CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP71]], align 8 +// CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK-NEXT: store i32 1, ptr [[TMP76]], align 4 -// CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP77]], align 4 -// CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[TMP74]], ptr [[TMP78]], align 8 -// CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP75]], ptr [[TMP79]], align 8 -// CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.11, ptr [[TMP80]], align 8 -// CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP81]], align 8 -// CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 -// CHECK-NEXT: store ptr null, ptr [[TMP82]], align 8 -// CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP83]], align 8 -// CHECK-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP84]], align 8 -// CHECK-NEXT: [[TMP85:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, ptr [[KERNEL_ARGS23]]) -// CHECK-NEXT: [[TMP86:%.*]] = icmp ne i32 [[TMP85]], 0 -// CHECK-NEXT: br i1 [[TMP86]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] +// CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP74]], align 4 +// CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 +// CHECK-NEXT: store i32 1, ptr [[TMP75]], align 4 +// CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP72]], ptr [[TMP76]], align 8 +// CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[TMP73]], ptr [[TMP77]], align 8 +// CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.11, ptr [[TMP78]], align 8 +// CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP79]], align 8 +// CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP80]], align 8 +// CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 +// CHECK-NEXT: store ptr null, ptr [[TMP81]], align 8 +// CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP82]], align 8 +// CHECK-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, ptr [[KERNEL_ARGS23]]) +// CHECK-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +// CHECK-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] // CHECK: omp_offload.failed24: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(ptr [[H]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT25]] // CHECK: omp_offload.cont25: -// CHECK-NEXT: [[TMP87:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[DA]], ptr [[TMP87]], align 8 -// CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[DA]], ptr [[TMP88]], align 8 -// CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP89]], align 8 -// CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 -// CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 +// CHECK-NEXT: [[TMP85:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[DA]], ptr [[TMP85]], align 8 +// CHECK-NEXT: [[TMP86:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[DA]], ptr [[TMP86]], align 8 +// CHECK-NEXT: [[TMP87:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP87]], align 8 +// CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 +// CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS29:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 0 -// CHECK-NEXT: store i32 1, ptr [[TMP92]], align 4 -// CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP93]], align 4 -// CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[TMP90]], ptr [[TMP94]], align 8 -// CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP91]], ptr [[TMP95]], align 8 -// CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.13, ptr [[TMP96]], align 8 -// CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.14, ptr [[TMP97]], align 8 -// CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 6 -// CHECK-NEXT: store ptr null, ptr [[TMP98]], align 8 -// CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP99]], align 8 -// CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP100]], align 8 -// CHECK-NEXT: [[TMP101:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, ptr [[KERNEL_ARGS29]]) -// CHECK-NEXT: [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0 -// CHECK-NEXT: br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]] +// CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP90]], align 4 +// CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 1 +// CHECK-NEXT: store i32 1, ptr [[TMP91]], align 4 +// CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP88]], ptr [[TMP92]], align 8 +// CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[TMP89]], ptr [[TMP93]], align 8 +// CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.13, ptr [[TMP94]], align 8 +// CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.14, ptr [[TMP95]], align 8 +// CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP96]], align 8 +// CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 7 +// CHECK-NEXT: store ptr null, ptr [[TMP97]], align 8 +// CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP98]], align 8 +// CHECK-NEXT: [[TMP99:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, ptr [[KERNEL_ARGS29]]) +// CHECK-NEXT: [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0 +// CHECK-NEXT: br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]] // CHECK: omp_offload.failed30: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(ptr [[DA]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT31]] // CHECK: omp_offload.cont31: -// CHECK-NEXT: [[TMP103:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_S0_(i32 noundef signext [[TMP103]]) +// CHECK-NEXT: [[TMP101:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_S0_(i32 noundef signext [[TMP101]]) // CHECK-NEXT: [[CALL32:%.*]] = call noundef ptr @_Z5tmainIPiET_S1_(ptr noundef [[ARGC_ADDR]]) -// CHECK-NEXT: [[TMP104:%.*]] = load i32, ptr [[CALL32]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP104]] +// CHECK-NEXT: [[TMP102:%.*]] = load i32, ptr [[CALL32]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP102]] // CHECK-NEXT: ret i32 [[ADD]] // // @@ -681,104 +681,104 @@ // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[Z]], align 8 // CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP19]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP20]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP21]], align 8 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP21]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP24]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP25]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8 -// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.17, ptr [[TMP29]], align 8 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.18, ptr [[TMP30]], align 8 -// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.17, ptr [[TMP28]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.18, ptr [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP31]], align 8 -// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP33]], align 8 -// CHECK-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l125.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP32]], align 8 +// CHECK-NEXT: [[TMP33:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l125.region_id, ptr [[KERNEL_ARGS4]]) +// CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK-NEXT: br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] // CHECK: omp_offload.failed5: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l125(ptr [[TMP18]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT6]] // CHECK: omp_offload.cont6: -// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[AA]], ptr [[TMP35]], align 8 +// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: store ptr [[AA]], ptr [[TMP36]], align 8 -// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[AA]], ptr [[TMP37]], align 8 -// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 -// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP37]], align 8 +// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP40]], align 4 +// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP41]], align 4 -// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP42]], align 4 -// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP38]], ptr [[TMP42]], align 8 +// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP39]], ptr [[TMP43]], align 8 -// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP40]], ptr [[TMP44]], align 8 -// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.19, ptr [[TMP45]], align 8 -// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.20, ptr [[TMP46]], align 8 -// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.19, ptr [[TMP44]], align 8 +// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.20, ptr [[TMP45]], align 8 +// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP46]], align 8 +// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP47]], align 8 -// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP49]], align 8 -// CHECK-NEXT: [[TMP50:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l127.region_id, ptr [[KERNEL_ARGS10]]) -// CHECK-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 -// CHECK-NEXT: br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP48]], align 8 +// CHECK-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l127.region_id, ptr [[KERNEL_ARGS10]]) +// CHECK-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] // CHECK: omp_offload.failed11: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l127(ptr [[AA]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT12]] // CHECK: omp_offload.cont12: -// CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[H]], ptr [[TMP51]], align 8 +// CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 // CHECK-NEXT: store ptr [[H]], ptr [[TMP52]], align 8 -// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[H]], ptr [[TMP53]], align 8 -// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 -// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP53]], align 8 +// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP56]], align 4 +// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP57]], align 4 -// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP58]], align 4 -// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP54]], ptr [[TMP58]], align 8 +// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP55]], ptr [[TMP59]], align 8 -// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP56]], ptr [[TMP60]], align 8 -// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.21, ptr [[TMP61]], align 8 -// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.22, ptr [[TMP62]], align 8 -// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.21, ptr [[TMP60]], align 8 +// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.22, ptr [[TMP61]], align 8 +// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP62]], align 8 +// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP63]], align 8 -// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP64]], align 8 -// CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP65]], align 8 -// CHECK-NEXT: [[TMP66:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l129.region_id, ptr [[KERNEL_ARGS16]]) -// CHECK-NEXT: [[TMP67:%.*]] = icmp ne i32 [[TMP66]], 0 -// CHECK-NEXT: br i1 [[TMP67]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP64]], align 8 +// CHECK-NEXT: [[TMP65:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l129.region_id, ptr [[KERNEL_ARGS16]]) +// CHECK-NEXT: [[TMP66:%.*]] = icmp ne i32 [[TMP65]], 0 +// CHECK-NEXT: br i1 [[TMP66]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] // CHECK: omp_offload.failed17: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l129(ptr [[H]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT18]] @@ -855,104 +855,104 @@ // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[Z]], align 8 // CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP19]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP20]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP21]], align 8 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP21]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP24]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP25]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8 -// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.25, ptr [[TMP29]], align 8 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.26, ptr [[TMP30]], align 8 -// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.25, ptr [[TMP28]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.26, ptr [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP31]], align 8 -// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP33]], align 8 -// CHECK-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l125.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP32]], align 8 +// CHECK-NEXT: [[TMP33:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l125.region_id, ptr [[KERNEL_ARGS4]]) +// CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK-NEXT: br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] // CHECK: omp_offload.failed5: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l125(ptr [[TMP18]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT6]] // CHECK: omp_offload.cont6: -// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[AA]], ptr [[TMP35]], align 8 +// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: store ptr [[AA]], ptr [[TMP36]], align 8 -// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[AA]], ptr [[TMP37]], align 8 -// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 -// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP37]], align 8 +// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP40]], align 4 +// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP41]], align 4 -// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP42]], align 4 -// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP38]], ptr [[TMP42]], align 8 +// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP39]], ptr [[TMP43]], align 8 -// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP40]], ptr [[TMP44]], align 8 -// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.27, ptr [[TMP45]], align 8 -// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.28, ptr [[TMP46]], align 8 -// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.27, ptr [[TMP44]], align 8 +// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.28, ptr [[TMP45]], align 8 +// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP46]], align 8 +// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP47]], align 8 -// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP49]], align 8 -// CHECK-NEXT: [[TMP50:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l127.region_id, ptr [[KERNEL_ARGS10]]) -// CHECK-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 -// CHECK-NEXT: br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP48]], align 8 +// CHECK-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l127.region_id, ptr [[KERNEL_ARGS10]]) +// CHECK-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] // CHECK: omp_offload.failed11: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l127(ptr [[AA]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT12]] // CHECK: omp_offload.cont12: -// CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[H]], ptr [[TMP51]], align 8 +// CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 // CHECK-NEXT: store ptr [[H]], ptr [[TMP52]], align 8 -// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[H]], ptr [[TMP53]], align 8 -// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 -// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP53]], align 8 +// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP56]], align 4 +// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP57]], align 4 -// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP58]], align 4 -// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP54]], ptr [[TMP58]], align 8 +// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP55]], ptr [[TMP59]], align 8 -// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP56]], ptr [[TMP60]], align 8 -// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.29, ptr [[TMP61]], align 8 -// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.30, ptr [[TMP62]], align 8 -// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.29, ptr [[TMP60]], align 8 +// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.30, ptr [[TMP61]], align 8 +// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP62]], align 8 +// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP63]], align 8 -// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP64]], align 8 -// CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP65]], align 8 -// CHECK-NEXT: [[TMP66:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l129.region_id, ptr [[KERNEL_ARGS16]]) -// CHECK-NEXT: [[TMP67:%.*]] = icmp ne i32 [[TMP66]], 0 -// CHECK-NEXT: br i1 [[TMP67]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP64]], align 8 +// CHECK-NEXT: [[TMP65:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l129.region_id, ptr [[KERNEL_ARGS16]]) +// CHECK-NEXT: [[TMP66:%.*]] = icmp ne i32 [[TMP65]], 0 +// CHECK-NEXT: br i1 [[TMP66]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] // CHECK: omp_offload.failed17: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l129(ptr [[H]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT18]] Index: clang/test/OpenMP/target_parallel_debug_codegen.cpp =================================================================== --- clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -95,7 +95,7 @@ // CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG47]] // CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG47]] // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG47]] -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG47]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG47]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG47]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG47]] // CHECK1: user_code.entry: @@ -105,15 +105,15 @@ // CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG48]] // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG48]] // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG48]] -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG48]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP16]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG48]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP18]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG48]] +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG48]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG48]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG48]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG48]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2, i1 true), !dbg [[DBG49:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2), !dbg [[DBG49:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG51:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG47]] @@ -176,36 +176,36 @@ // CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG83]] // CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG84:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG85:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG86:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG85]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG86:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64, !dbg [[DBG85]] // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG85]] // CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX9]], align 4, !dbg [[DBG87:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG88:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG88]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG89:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG88]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG89:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG88]] // CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG88]] // CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX13]], align 4, !dbg [[DBG90:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG91:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG91]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG92:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG91]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG92:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG91]] // CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG91]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG91]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG91]] // CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG93:![0-9]+]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG94:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG93]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG94:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG93]] // CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG93]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG96:![0-9]+]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG97:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP15]] to i64, !dbg [[DBG96]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG97:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG96]] // CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG96]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG96]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG98:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG98]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG96]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG98:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP15]] to i1, !dbg [[DBG98]] // CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG98]] -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP16]], !dbg [[DBG98]] +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP14]], !dbg [[DBG98]] // CHECK1-NEXT: [[TOBOOL24:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG98]] // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL24]] to i8, !dbg [[DBG98]] // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG98]] @@ -308,7 +308,7 @@ // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG137]] // CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG137]] // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG137]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false), !dbg [[DBG137]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG137]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG137]] // CHECK1: user_code.entry: @@ -318,15 +318,15 @@ // CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG138]] // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG138]] // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG138]] -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG138]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP17]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG138]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP19]], align 8, !dbg [[DBG138]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG138]] +// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG138]] +// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG138]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG138]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG138]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG138]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG138]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2, i1 true), !dbg [[DBG139:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2), !dbg [[DBG139:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG141:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG137]] @@ -517,21 +517,21 @@ // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG212]] // CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG212]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false), !dbg [[DBG212]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]) // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]] // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG213]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG213]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP18]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG213]] -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP20]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG213]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG213]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG213]] +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG213]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2, i1 true), !dbg [[DBG214:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG214:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG212]] Index: clang/test/OpenMP/target_parallel_for_debug_codegen.cpp =================================================================== --- clang/test/OpenMP/target_parallel_for_debug_codegen.cpp +++ clang/test/OpenMP/target_parallel_for_debug_codegen.cpp @@ -89,7 +89,7 @@ // CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG41]] // CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG41]] // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG41]] -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG41]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false), !dbg [[DBG41]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG41]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]] // CHECK1: user_code.entry: @@ -99,18 +99,18 @@ // CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG42]] // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG42]] // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG42]] -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG42]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP16]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG42]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP18]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG43:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP19]] to i1, !dbg [[DBG43]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP20]], i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2, i1 true), !dbg [[DBG45:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG42]] +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG42]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG42]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG43:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2), !dbg [[DBG45:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG41]] @@ -175,38 +175,38 @@ // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B4]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG65]] // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !dbg [[DBG65]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG74:![0-9]+]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG65]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG74:![0-9]+]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG65]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 9, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9, !dbg [[DBG68]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG68]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG68]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] // CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG68]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ], !dbg [[DBG68]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG68]] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG65]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG65]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG65]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]], !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG65]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG65]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1, !dbg [[DBG75:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1, !dbg [[DBG75:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG75]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG75]] // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]] @@ -224,36 +224,36 @@ // CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG87]] // CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG88:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG89:![0-9]+]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG90:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG89]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG90:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG89]] // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG89]] // CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG91:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG92:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG92]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG93:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG92]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG93:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG92]] // CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG92]] // CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG94:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG95:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG95]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG96:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG95]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG96:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG95]] // CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG95]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95]] // CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG97:![0-9]+]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG98:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG97]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG98:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG97]] // CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG97]] -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG99:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG99:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG100:![0-9]+]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG101:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP25]] to i64, !dbg [[DBG100]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG101:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG100]] // CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG100]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG100]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG102:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP27]] to i1, !dbg [[DBG102]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG100]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG102:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1, !dbg [[DBG102]] // CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG102]] -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP26]], !dbg [[DBG102]] +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP24]], !dbg [[DBG102]] // CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG102]] // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL27]] to i8, !dbg [[DBG102]] // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG102]] @@ -261,24 +261,24 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG74]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP28]], 1, !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG65]] // CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG65]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP104:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG74]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG65]] // CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP31]], [[TMP32]], !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG65]] // CHECK1-NEXT: store i32 [[ADD30]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG65]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP106:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP11]]), !dbg [[DBG105:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG105:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG107:![0-9]+]] // // @@ -383,7 +383,7 @@ // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG146]] // CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG146]] // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG146]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false), !dbg [[DBG146]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]] // CHECK1: user_code.entry: @@ -393,15 +393,15 @@ // CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG147]] // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG147]] // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG147]] -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG147]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP17]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP19]], align 8, !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG147]] +// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG147]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG147]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2, i1 true), !dbg [[DBG148:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG148:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG146]] @@ -665,21 +665,21 @@ // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG236]] // CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG236]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false), !dbg [[DBG236]] // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]] // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB24:[0-9]+]]) // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]] // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG237]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP18]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]] -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP20]], align 8, !dbg [[DBG237]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG237]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG237]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG237]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]] +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG237]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2, i1 true), !dbg [[DBG238:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2), !dbg [[DBG238:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG236]] Index: clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp +++ clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp @@ -138,198 +138,198 @@ // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) // CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 -// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP33]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN10]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 +// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP41:%.*]] = add nuw i64 [[TMP40]], 1 +// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP42]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP49]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP50]], ptr [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP57]], 9 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP52]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP53]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP58]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP54]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP59]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP55]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP60]], [[TMP61]] +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP56]], [[TMP57]] // CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP62]], 1 +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP58]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP66]], ptr [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 -// CHECK1-NEXT: [[TMP69:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP68]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP69]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[TMP72]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP73]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP69]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP75]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP77]], ptr [[TMP76]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 -// CHECK1-NEXT: [[TMP80:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP79]], ptr [[TMP69]]) +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: store ptr [[TMP62]], ptr [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4 +// CHECK1-NEXT: [[TMP65:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP64]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP65]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP66]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[TMP67]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP68]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP65]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP69]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP71]], ptr [[TMP70]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 +// CHECK1-NEXT: [[TMP74:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP73]], ptr [[TMP65]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP81:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP81]], 1 +// CHECK1-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP75]], 1 // CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP82:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP82]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP83]]) +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP77]]) +// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP79]], i32 1) +// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP80]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP81]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP83:%.*]] = inttoptr i64 [[TMP11]] to ptr +// CHECK1-NEXT: store ptr [[TMP83]], ptr [[TMP82]], align 8 // CHECK1-NEXT: [[TMP84:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP85]], i32 1) -// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP86]], align 8 -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP88]], align 8 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP90:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP90]], ptr [[TMP89]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[TMP91]], align 4 -// CHECK1-NEXT: [[TMP94:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP92]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP94]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP86:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP85]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP86]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP95]], [[TMP96]] +// CHECK1-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[ARGC1]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP87]], [[TMP88]] // CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP97]] +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP89]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP98]] to i32 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP99]] to i32 +// CHECK1-NEXT: [[TMP90:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP90]] to i32 +// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP91]] to i32 // CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] // CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 // CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP97]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP89]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP92]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP85]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP100:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP101:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP100]] monotonic, align 4 -// CHECK1-NEXT: [[TMP102:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP102]] +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[ARGC1]], align 4 +// CHECK1-NEXT: [[TMP93:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP92]] monotonic, align 4 +// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP94]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] // CHECK1: omp.arraycpy.body24: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP103:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP103]] to i32 +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 +// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP95]] to i32 // CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP104:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP109:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP104]], ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP105:%.*]] = load i8, ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP105]] to i32 -// CHECK1-NEXT: [[TMP106:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP106]] to i32 +// CHECK1-NEXT: [[TMP96:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP101:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP96]], ptr [[_TMP28]], align 1 +// CHECK1-NEXT: [[TMP97:%.*]] = load i8, ptr [[_TMP28]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP97]] to i32 +// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 +// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP98]] to i32 // CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] // CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 // CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP108:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP104]], i8 [[TMP107]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP109]] = extractvalue { i8, i1 } [[TMP108]], 0 -// CHECK1-NEXT: [[TMP110:%.*]] = extractvalue { i8, i1 } [[TMP108]], 1 -// CHECK1-NEXT: br i1 [[TMP110]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP100:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP96]], i8 [[TMP99]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP101]] = extractvalue { i8, i1 } [[TMP100]], 0 +// CHECK1-NEXT: [[TMP102:%.*]] = extractvalue { i8, i1 } [[TMP100]], 1 +// CHECK1-NEXT: br i1 [[TMP102]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP102]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP94]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] // CHECK1: omp.arraycpy.done36: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP111:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP111]]) +// CHECK1-NEXT: [[TMP103:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP103]]) // CHECK1-NEXT: ret void // // @@ -340,8 +340,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -352,12 +352,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -452,59 +452,59 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 -// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]] -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]]) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]] // CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -516,38 +516,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void Index: clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp +++ clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp @@ -127,155 +127,155 @@ // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) // CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 -// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 0, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP55]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP58]], ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP60]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP61]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP63]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP65]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP61]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP67]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP69]], ptr [[TMP68]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP70]], align 4 -// CHECK1-NEXT: [[TMP72:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP71]], ptr [[TMP61]]) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP41:%.*]] = add nuw i64 [[TMP40]], 1 +// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP42]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP49]], i32 0, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP50]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP54]], ptr [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP56]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP57]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP58]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP59]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP60]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP57]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP61]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP63]], ptr [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP64]], align 4 +// CHECK1-NEXT: [[TMP66:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP65]], ptr [[TMP57]]) +// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP68]], i32 0) +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP70]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP72:%.*]] = inttoptr i64 [[TMP11]] to ptr +// CHECK1-NEXT: store ptr [[TMP72]], ptr [[TMP71]], align 8 // CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP74]], i32 0) -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP75]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP77]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP79:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP81]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP83]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP75:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP74]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP75]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP84]], [[TMP85]] +// CHECK1-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[ARGC1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP76]], [[TMP77]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP86]] +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP78]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP87]] to i32 -// CHECK1-NEXT: [[TMP88:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP88]] to i32 +// CHECK1-NEXT: [[TMP79:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP79]] to i32 +// CHECK1-NEXT: [[TMP80:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP80]] to i32 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] // CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 // CHECK1-NEXT: store i8 [[CONV15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP86]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP78]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP81]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP74]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP89]] monotonic, align 4 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP91]] +// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[ARGC1]], align 4 +// CHECK1-NEXT: [[TMP82:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP81]] monotonic, align 4 +// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP83]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] // CHECK1: omp.arraycpy.body20: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP92]] to i32 +// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 +// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP84]] to i32 // CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP93:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP98:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP93]], ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP94]] to i32 -// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP95]] to i32 +// CHECK1-NEXT: [[TMP85:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP90:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP85]], ptr [[_TMP24]], align 1 +// CHECK1-NEXT: [[TMP86:%.*]] = load i8, ptr [[_TMP24]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP86]] to i32 +// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP87]] to i32 // CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] // CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 // CHECK1-NEXT: store i8 [[CONV28]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP96:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP97:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP93]], i8 [[TMP96]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP98]] = extractvalue { i8, i1 } [[TMP97]], 0 -// CHECK1-NEXT: [[TMP99:%.*]] = extractvalue { i8, i1 } [[TMP97]], 1 -// CHECK1-NEXT: br i1 [[TMP99]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP88:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP89:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP85]], i8 [[TMP88]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP90]] = extractvalue { i8, i1 } [[TMP89]], 0 +// CHECK1-NEXT: [[TMP91:%.*]] = extractvalue { i8, i1 } [[TMP89]], 1 +// CHECK1-NEXT: br i1 [[TMP91]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP91]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP83]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] // CHECK1: omp.arraycpy.done32: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP100:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP100]]) +// CHECK1-NEXT: [[TMP92:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP92]]) // CHECK1-NEXT: ret void // // @@ -286,8 +286,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -298,12 +298,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -398,59 +398,59 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 -// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]] -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]]) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]] // CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -462,38 +462,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void Index: clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp @@ -116,34 +116,34 @@ // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -151,8 +151,8 @@ // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: ret i32 [[TMP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 @@ -276,34 +276,34 @@ // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -311,8 +311,8 @@ // CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: ret i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: ret i32 [[TMP16]] // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 @@ -430,10 +430,10 @@ // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -458,91 +458,91 @@ // CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] // CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4 -// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4 -// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8 -// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8 -// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8 -// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8 -// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8 -// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8 -// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 -// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]]) // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]]) -// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP60]] +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP49]] // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80 @@ -585,18 +585,18 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -610,30 +610,30 @@ // CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 @@ -641,11 +641,11 @@ // CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) // CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 @@ -659,54 +659,54 @@ // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP19]], [[CONV20]] -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP19]], [[CONV18]] +// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP22]], [[CONV27]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP21]], [[MUL33]] -// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK9-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I13]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP22]], [[CONV25]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP21]], [[MUL31]] +// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I11]], align 4 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[J14]], align 4 -// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX39]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK9-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP28]], 1 +// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -732,34 +732,34 @@ // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67(ptr [[A]]) #[[ATTR3]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -910,91 +910,91 @@ // CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 // CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false) -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4 -// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] // CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4 -// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4 -// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4 -// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8 -// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8 +// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]]) // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]]) -// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP59]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP48]] // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80 @@ -1182,34 +1182,34 @@ // CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67(ptr [[A]]) #[[ATTR3]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] Index: clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp @@ -121,34 +121,34 @@ // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -156,8 +156,8 @@ // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: ret i32 [[TMP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 @@ -356,34 +356,34 @@ // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -391,8 +391,8 @@ // CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: ret i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: ret i32 [[TMP16]] // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 @@ -581,10 +581,10 @@ // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -609,91 +609,91 @@ // CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] // CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4 -// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4 -// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8 -// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8 -// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8 -// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8 -// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8 -// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8 -// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 -// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]]) // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]]) -// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP60]] +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP49]] // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 @@ -736,18 +736,18 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 @@ -763,30 +763,30 @@ // CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 @@ -794,11 +794,11 @@ // CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) // CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 @@ -812,8 +812,8 @@ // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 // CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 @@ -856,18 +856,18 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 @@ -883,30 +883,30 @@ // CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 @@ -918,11 +918,11 @@ // CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) // CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 @@ -936,54 +936,54 @@ // CHECK9: omp.inner.for.cond: // CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]] -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] +// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 // CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]] -// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK9-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] +// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 // CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4 -// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX39]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: // CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1 +// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1009,34 +1009,34 @@ // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1262,91 +1262,91 @@ // CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 // CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false) -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4 -// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] // CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4 -// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4 -// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4 -// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8 -// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8 +// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]]) // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]]) -// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP59]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP48]] // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 @@ -1664,34 +1664,34 @@ // CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] Index: clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp @@ -499,37 +499,37 @@ // CHECK1: omp_if.then: // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP22]] to i1 -// CHECK1-NEXT: [[TMP23:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK1-NEXT: [[TMP21:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1 // CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP24]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP25]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 -// CHECK1-NEXT: store i64 100, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP23]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS6]]) -// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK1-NEXT: br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP23]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP21]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS6]]) +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] // CHECK1: omp_offload.failed7: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP13]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]] @@ -539,8 +539,8 @@ // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP13]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr @Arg, align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP35]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr @Arg, align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP33]]) // CHECK1-NEXT: ret i32 [[CALL]] // // @@ -1037,37 +1037,37 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP21]] to i1 -// CHECK1-NEXT: [[TMP22:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP19]] to i1 +// CHECK1-NEXT: [[TMP20:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1 // CHECK1-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP23]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP24]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.16, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.17, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 -// CHECK1-NEXT: store i64 100, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP22]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS5]]) -// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK1-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.16, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.17, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP20]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS5]]) +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] // CHECK1: omp_offload.failed6: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68(i64 [[TMP13]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT7]] Index: clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp @@ -167,22 +167,22 @@ // CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] Index: clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp @@ -101,34 +101,34 @@ // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -212,21 +212,21 @@ // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -311,21 +311,21 @@ // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -339,15 +339,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -359,15 +359,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -382,36 +382,36 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -494,21 +494,21 @@ // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -593,21 +593,21 @@ // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -621,15 +621,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -641,15 +641,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -671,34 +671,34 @@ // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -780,21 +780,21 @@ // CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -877,21 +877,21 @@ // CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -905,15 +905,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -925,15 +925,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -948,36 +948,36 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1058,21 +1058,21 @@ // CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1155,21 +1155,21 @@ // CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1183,15 +1183,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -1203,15 +1203,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -1307,21 +1307,21 @@ // CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK5-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 // CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -1410,21 +1410,21 @@ // CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP15]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 // CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP21]] monotonic, align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -1438,15 +1438,15 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK5-NEXT: ret void // // @@ -1458,15 +1458,15 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK5-NEXT: ret void // // Index: clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -133,178 +133,178 @@ // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) // CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 -// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP41:%.*]] = add nuw i64 [[TMP40]], 1 +// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP42]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP49]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP50]], ptr [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP57]], 9 +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP52]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP53]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP58]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP54]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP59]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP55]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP60]], [[TMP61]] +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP56]], [[TMP57]] // CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, i64 [[TMP62]], i64 [[TMP63]], ptr [[ARGC1]], ptr [[TMP64]]) +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, i64 [[TMP58]], i64 [[TMP59]], ptr [[ARGC1]], ptr [[TMP60]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP61]], [[TMP62]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP68]]) -// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP70]], i32 1) -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP71]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP73]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP75:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP75]], ptr [[TMP74]], align 8 -// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 -// CHECK1-NEXT: [[TMP79:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP77]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP79]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP64]]) +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP66]], i32 1) +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP70:%.*]] = inttoptr i64 [[TMP11]] to ptr +// CHECK1-NEXT: store ptr [[TMP70]], ptr [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4 +// CHECK1-NEXT: [[TMP73:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP72]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP73]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP80]], [[TMP81]] +// CHECK1-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[ARGC1]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP74]], [[TMP75]] // CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP82]] +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP76]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP83:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP83]] to i32 -// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP84]] to i32 +// CHECK1-NEXT: [[TMP77:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP77]] to i32 +// CHECK1-NEXT: [[TMP78:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP78]] to i32 // CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] // CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 // CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP82]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP76]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done21: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP77]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP72]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP85]] monotonic, align 4 -// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP87]] +// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[ARGC1]], align 4 +// CHECK1-NEXT: [[TMP80:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP79]] monotonic, align 4 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP81]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] // CHECK1: omp.arraycpy.body23: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP88:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP88]] to i32 +// CHECK1-NEXT: [[TMP82:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP82]] to i32 // CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP89:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP94:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP89]], ptr [[_TMP27]], align 1 -// CHECK1-NEXT: [[TMP90:%.*]] = load i8, ptr [[_TMP27]], align 1 -// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP90]] to i32 -// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP91]] to i32 +// CHECK1-NEXT: [[TMP83:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP88:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP83]], ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP84]] to i32 +// CHECK1-NEXT: [[TMP85:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP85]] to i32 // CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] // CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 // CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP93:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP89]], i8 [[TMP92]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP94]] = extractvalue { i8, i1 } [[TMP93]], 0 -// CHECK1-NEXT: [[TMP95:%.*]] = extractvalue { i8, i1 } [[TMP93]], 1 -// CHECK1-NEXT: br i1 [[TMP95]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP86:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP87:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP83]], i8 [[TMP86]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP88]] = extractvalue { i8, i1 } [[TMP87]], 0 +// CHECK1-NEXT: [[TMP89:%.*]] = extractvalue { i8, i1 } [[TMP87]], 1 +// CHECK1-NEXT: br i1 [[TMP89]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP87]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP81]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] // CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP96:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP96]]) +// CHECK1-NEXT: [[TMP90:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP90]]) // CHECK1-NEXT: ret void // // @@ -315,8 +315,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -327,12 +327,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -477,198 +477,198 @@ // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..4, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..5, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..5, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) // CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP35]], i64 0 -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP36]], i64 0 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = sext i32 [[TMP37]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP38]] -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP39]], i64 9 -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP40]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP44:%.*]] = sub i64 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: [[TMP45:%.*]] = sdiv exact i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = add nuw i64 [[TMP45]], 1 -// CHECK1-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP47]], ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..6, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..7, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP35]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP36]], i64 9 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = add nuw i64 [[TMP42]], 1 +// CHECK1-NEXT: [[TMP44:%.*]] = mul nuw i64 [[TMP43]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP44]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..6, ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..7, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP50]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP51]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP52]], ptr [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP54]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP56]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP58]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP59]], 9 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP54]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP55]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP60]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP56]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP61]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP57]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP58]], [[TMP59]] // CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP64]], 1 +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP60]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP68]], ptr [[TMP67]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4 -// CHECK1-NEXT: [[TMP71:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP70]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP73]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[TMP74]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP75]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP71]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP77]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP81]], ptr [[TMP71]]) +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: store ptr [[TMP64]], ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 +// CHECK1-NEXT: [[TMP67:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP66]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP67]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP68]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[TMP69]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP70]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP67]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP71]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP73]], ptr [[TMP72]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4 +// CHECK1-NEXT: [[TMP76:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP75]], ptr [[TMP67]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP83:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP83]], 1 +// CHECK1-NEXT: [[TMP77:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP77]], 1 // CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP84:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP85]]) +// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP79]]) +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP81]], i32 1) +// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP82]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP83]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP85:%.*]] = inttoptr i64 [[TMP13]] to ptr +// CHECK1-NEXT: store ptr [[TMP85]], ptr [[TMP84]], align 8 // CHECK1-NEXT: [[TMP86:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP87]], i32 1) -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP88]], align 8 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP90]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP13]] to ptr -// CHECK1-NEXT: store ptr [[TMP92]], ptr [[TMP91]], align 8 -// CHECK1-NEXT: [[TMP93:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP93]], align 4 -// CHECK1-NEXT: [[TMP96:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP96]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP88:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP87]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP88]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP98:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP97]], [[TMP98]] +// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP90:%.*]] = load i32, ptr [[ARGC1]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP89]], [[TMP90]] // CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP99]] +// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP91]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP101]] to i32 +// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP92]] to i32 +// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP93]] to i32 // CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] // CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 // CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP99]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP91]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP87]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP102:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP103:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP102]] monotonic, align 4 -// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP104]] +// CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[ARGC1]], align 4 +// CHECK1-NEXT: [[TMP95:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP94]] monotonic, align 4 +// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP96]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] // CHECK1: omp.arraycpy.body24: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP105:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP105]] to i32 +// CHECK1-NEXT: [[TMP97:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 +// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP97]] to i32 // CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP106:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP111:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP106]], ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP107]] to i32 -// CHECK1-NEXT: [[TMP108:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP108]] to i32 +// CHECK1-NEXT: [[TMP98:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP103:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP98]], ptr [[_TMP28]], align 1 +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[_TMP28]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP99]] to i32 +// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 +// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP100]] to i32 // CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] // CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 // CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP109:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP110:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP106]], i8 [[TMP109]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP111]] = extractvalue { i8, i1 } [[TMP110]], 0 -// CHECK1-NEXT: [[TMP112:%.*]] = extractvalue { i8, i1 } [[TMP110]], 1 -// CHECK1-NEXT: br i1 [[TMP112]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP102:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP98]], i8 [[TMP101]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP103]] = extractvalue { i8, i1 } [[TMP102]], 0 +// CHECK1-NEXT: [[TMP104:%.*]] = extractvalue { i8, i1 } [[TMP102]], 1 +// CHECK1-NEXT: br i1 [[TMP104]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP104]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP96]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] // CHECK1: omp.arraycpy.done36: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP113:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP113]]) +// CHECK1-NEXT: [[TMP105:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP105]]) // CHECK1-NEXT: ret void // // @@ -679,8 +679,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -691,12 +691,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -791,59 +791,59 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 -// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]] -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]]) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]] // CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -855,38 +855,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void @@ -900,38 +900,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void Index: clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp @@ -121,34 +121,34 @@ // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -156,8 +156,8 @@ // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: ret i32 [[TMP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 @@ -211,22 +211,22 @@ // CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group !4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -294,39 +294,39 @@ // CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -372,34 +372,34 @@ // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -407,8 +407,8 @@ // CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: ret i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: ret i32 [[TMP16]] // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 @@ -462,20 +462,20 @@ // CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group !5 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -541,37 +541,37 @@ // CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -622,39 +622,39 @@ // CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456 // CHECK5-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]] // CHECK5-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] -// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 @@ -693,37 +693,37 @@ // CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456 // CHECK7-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]] // CHECK7-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP6]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]] -// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 @@ -753,10 +753,10 @@ // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -781,91 +781,91 @@ // CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] // CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4 -// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4 -// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8 -// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8 -// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8 -// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8 -// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8 -// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8 -// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 -// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]]) // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]]) -// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP60]] +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP49]] // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 @@ -908,18 +908,18 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 @@ -935,30 +935,30 @@ // CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 @@ -966,11 +966,11 @@ // CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) // CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 @@ -982,26 +982,26 @@ // CHECK9-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[M_ADDR]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[M_CASTED]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[M_CASTED]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group !5 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[M_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[M_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[M_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1014,17 +1014,17 @@ // CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] +// CHECK9-NEXT: store i32 [[ADD18]], ptr [[I11]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK9-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK9-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 // CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK9-NEXT: store i32 [[ADD22]], ptr [[I13]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB23:%.*]] = sub nsw i32 [[TMP32]], 0 -// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1 -// CHECK9-NEXT: [[MUL25:%.*]] = mul nsw i32 [[DIV24]], 1 -// CHECK9-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK9-NEXT: store i32 [[ADD26]], ptr [[J14]], align 4 +// CHECK9-NEXT: store i32 [[ADD22]], ptr [[J12]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1046,18 +1046,18 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 @@ -1073,30 +1073,30 @@ // CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 // CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 @@ -1108,11 +1108,11 @@ // CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) // CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 @@ -1124,56 +1124,56 @@ // CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]] -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]] -// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK9-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] +// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] +// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 // CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX39]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1 +// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1186,17 +1186,17 @@ // CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP35]], 0 -// CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1 -// CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1 -// CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]] -// CHECK9-NEXT: store i32 [[ADD44]], ptr [[I13]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB45:%.*]] = sub nsw i32 [[TMP36]], 0 -// CHECK9-NEXT: [[DIV46:%.*]] = sdiv i32 [[SUB45]], 1 -// CHECK9-NEXT: [[MUL47:%.*]] = mul nsw i32 [[DIV46]], 1 -// CHECK9-NEXT: [[ADD48:%.*]] = add nsw i32 0, [[MUL47]] -// CHECK9-NEXT: store i32 [[ADD48]], ptr [[J14]], align 4 +// CHECK9-NEXT: [[SUB39:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK9-NEXT: [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1 +// CHECK9-NEXT: [[MUL41:%.*]] = mul nsw i32 [[DIV40]], 1 +// CHECK9-NEXT: [[ADD42:%.*]] = add nsw i32 0, [[MUL41]] +// CHECK9-NEXT: store i32 [[ADD42]], ptr [[I11]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB43:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK9-NEXT: [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1 +// CHECK9-NEXT: [[MUL45:%.*]] = mul nsw i32 [[DIV44]], 1 +// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 0, [[MUL45]] +// CHECK9-NEXT: store i32 [[ADD46]], ptr [[J12]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1217,34 +1217,34 @@ // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1303,22 +1303,22 @@ // CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group !14 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1386,38 +1386,38 @@ // CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1486,91 +1486,91 @@ // CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 // CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false) -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4 -// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] // CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4 -// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4 -// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4 -// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8 -// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8 +// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]]) // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]]) -// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP59]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP48]] // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 @@ -1687,28 +1687,28 @@ // CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[N_CASTED]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[M_ADDR]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[M_CASTED]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[M_CASTED]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group !6 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[M_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[M_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[M_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP27]], [[TMP28]] -// CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1833,13 +1833,13 @@ // CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 // CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 // CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] @@ -1848,16 +1848,16 @@ // CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] // CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] // CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 // CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 // CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] @@ -1867,20 +1867,20 @@ // CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 // CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] // CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP1]] // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1924,34 +1924,34 @@ // CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -2010,20 +2010,20 @@ // CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group !15 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2089,36 +2089,36 @@ // CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2212,13 +2212,13 @@ // CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK13-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK13-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] @@ -2227,16 +2227,16 @@ // CHECK13-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] // CHECK13-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP20]], 0 // CHECK13-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK13-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] // CHECK13-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 // CHECK13-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP19]], [[CONV22]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK13-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK13-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] @@ -2246,22 +2246,22 @@ // CHECK13-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 // CHECK13-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] // CHECK13-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 -// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK13-NEXT: [[TMP23:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP23]] -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM33:%.*]] = sext i32 [[TMP24]] to i64 // CHECK13-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM33]] -// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -2306,38 +2306,38 @@ // CHECK13-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2 // CHECK13-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]] // CHECK13-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM6]] -// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 @@ -2413,13 +2413,13 @@ // CHECK15-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK15-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK15-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] @@ -2428,16 +2428,16 @@ // CHECK15-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] // CHECK15-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP18]], 0 // CHECK15-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK15-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] // CHECK15-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 // CHECK15-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP17]], [[CONV22]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP19]], 0 // CHECK15-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK15-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] @@ -2447,20 +2447,20 @@ // CHECK15-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 // CHECK15-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] // CHECK15-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 -// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[TMP21:%.*]] = mul nsw i32 [[TMP20]], [[TMP1]] // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i32 [[TMP21]] -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP22]] -// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ADD34:%.*]] = add nsw i64 [[TMP23]], 1 -// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -2505,36 +2505,36 @@ // CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2 // CHECK15-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2 // CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]] // CHECK15-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i32 0, i32 [[TMP6]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]] -// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 Index: clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -154,23 +154,23 @@ // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4), !llvm.access.group !6 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4), !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !6 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -232,22 +232,22 @@ // CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -306,23 +306,23 @@ // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3), !llvm.access.group !15 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3), !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -384,22 +384,22 @@ // CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -491,23 +491,23 @@ // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !21 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !21 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -569,22 +569,22 @@ // CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -628,22 +628,22 @@ // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: store i32 1000, ptr [[I]], align 4 @@ -653,22 +653,22 @@ // CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV5]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK3: omp.inner.for.cond7: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB4]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB4]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK3: omp.inner.for.body9: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK3-NEXT: store i32 [[ADD11]], ptr [[I6]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD11]], ptr [[I6]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK3: omp.body.continue12: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK3: omp.inner.for.inc13: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end15: // CHECK3-NEXT: store i32 1000, ptr [[I6]], align 4 @@ -690,22 +690,22 @@ // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: store i32 1000, ptr [[I]], align 4 Index: clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp @@ -101,34 +101,34 @@ // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -189,22 +189,22 @@ // CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group !5 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -219,21 +219,21 @@ // CHECK1: .omp.final.done: // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -291,26 +291,26 @@ // CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -325,21 +325,21 @@ // CHECK1: .omp.final.done: // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -353,15 +353,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -373,15 +373,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -396,36 +396,36 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -485,22 +485,22 @@ // CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]), !llvm.access.group !14 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -515,21 +515,21 @@ // CHECK1: .omp.final.done: // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -587,26 +587,26 @@ // CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -621,21 +621,21 @@ // CHECK1: .omp.final.done: // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -649,15 +649,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -669,15 +669,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -699,34 +699,34 @@ // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -787,20 +787,20 @@ // CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -815,21 +815,21 @@ // CHECK3: .omp.final.done: // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -885,26 +885,26 @@ // CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -919,21 +919,21 @@ // CHECK3: .omp.final.done: // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -947,15 +947,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -967,15 +967,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -990,36 +990,36 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1079,20 +1079,20 @@ // CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]), !llvm.access.group !15 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1107,21 +1107,21 @@ // CHECK3: .omp.final.done: // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1177,26 +1177,26 @@ // CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1211,21 +1211,21 @@ // CHECK3: .omp.final.done: // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1239,15 +1239,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -1259,15 +1259,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -1340,22 +1340,22 @@ // CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group !4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1370,21 +1370,21 @@ // CHECK5: .omp.final.done: // CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK5-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 8 -// CHECK5-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 // CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -1443,29 +1443,29 @@ // CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK5-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !8 +// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1480,21 +1480,21 @@ // CHECK5: .omp.final.done: // CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP17]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 // CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP23]] monotonic, align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR2]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP21]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -1508,15 +1508,15 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK5-NEXT: ret void // // @@ -1528,15 +1528,15 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK5-NEXT: ret void // // @@ -1565,26 +1565,26 @@ // CHECK7-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] -// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 @@ -1611,37 +1611,37 @@ // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: store i32 0, ptr [[T_VAR1]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK7-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4 // CHECK7-NEXT: ret i32 0 // @@ -1664,26 +1664,26 @@ // CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK9-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !3 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] -// CHECK9-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !3 +// CHECK9-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 @@ -1710,37 +1710,37 @@ // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: store i32 0, ptr [[T_VAR1]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK9-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !7 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] -// CHECK9-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !7 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4 // CHECK9-NEXT: ret i32 0 // Index: clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp @@ -89,34 +89,34 @@ // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -204,21 +204,21 @@ // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: @@ -233,15 +233,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -256,36 +256,36 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -372,21 +372,21 @@ // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: @@ -401,15 +401,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -431,34 +431,34 @@ // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -546,21 +546,21 @@ // CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: @@ -575,15 +575,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -598,36 +598,36 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -714,21 +714,21 @@ // CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: @@ -743,15 +743,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -855,21 +855,21 @@ // CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 // CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP17]] monotonic, align 4 // CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: @@ -884,15 +884,15 @@ // CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK9-NEXT: ret void // // Index: clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp @@ -116,34 +116,34 @@ // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -151,8 +151,8 @@ // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: ret i32 [[TMP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 @@ -206,39 +206,39 @@ // CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -284,34 +284,34 @@ // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -319,8 +319,8 @@ // CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: ret i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: ret i32 [[TMP16]] // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 @@ -374,37 +374,37 @@ // CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] -// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -455,39 +455,39 @@ // CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456 // CHECK5-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]] // CHECK5-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] -// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 @@ -526,37 +526,37 @@ // CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456 // CHECK7-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]] // CHECK7-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP6]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]] -// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 @@ -586,10 +586,10 @@ // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -614,91 +614,91 @@ // CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] // CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4 -// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4 -// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8 -// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8 -// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8 -// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8 -// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8 -// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8 -// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 -// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR4:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]]) // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]]) -// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP60]] +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP49]] // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80 @@ -741,18 +741,18 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -766,30 +766,30 @@ // CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 @@ -797,11 +797,11 @@ // CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) // CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: // CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 @@ -813,56 +813,56 @@ // CHECK9-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP19]], [[CONV20]] -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP22]], [[CONV27]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP21]], [[MUL33]] -// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK9-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I13]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP19]], [[CONV18]] +// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP22]], [[CONV25]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP21]], [[MUL31]] +// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[J14]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX39]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK9-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP28]], 1 +// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -875,17 +875,17 @@ // CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP33]], 0 -// CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1 -// CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1 -// CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]] -// CHECK9-NEXT: store i32 [[ADD44]], ptr [[I13]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB45:%.*]] = sub nsw i32 [[TMP34]], 0 -// CHECK9-NEXT: [[DIV46:%.*]] = sdiv i32 [[SUB45]], 1 -// CHECK9-NEXT: [[MUL47:%.*]] = mul nsw i32 [[DIV46]], 1 -// CHECK9-NEXT: [[ADD48:%.*]] = add nsw i32 0, [[MUL47]] -// CHECK9-NEXT: store i32 [[ADD48]], ptr [[J14]], align 4 +// CHECK9-NEXT: [[SUB39:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1 +// CHECK9-NEXT: [[MUL41:%.*]] = mul nsw i32 [[DIV40]], 1 +// CHECK9-NEXT: [[ADD42:%.*]] = add nsw i32 0, [[MUL41]] +// CHECK9-NEXT: store i32 [[ADD42]], ptr [[I11]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK9-NEXT: [[SUB43:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1 +// CHECK9-NEXT: [[MUL45:%.*]] = mul nsw i32 [[DIV44]], 1 +// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 0, [[MUL45]] +// CHECK9-NEXT: store i32 [[ADD46]], ptr [[J12]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -906,34 +906,34 @@ // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67(ptr [[A]]) #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -992,38 +992,38 @@ // CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1092,91 +1092,91 @@ // CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 // CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false) -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4 -// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] // CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4 -// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4 -// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4 -// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8 -// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8 +// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR4:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]]) // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]]) -// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP59]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP48]] // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80 @@ -1291,13 +1291,13 @@ // CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP20]], 0 // CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] @@ -1306,16 +1306,16 @@ // CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] // CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP23]], 0 // CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 // CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] // CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 // CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP22]], [[CONV25]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP24]], 0 // CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 // CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] @@ -1325,20 +1325,20 @@ // CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 // CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] // CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: [[TMP26:%.*]] = mul nsw i32 [[TMP25]], [[TMP1]] // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP27]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1382,34 +1382,34 @@ // CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67(ptr [[A]]) #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1468,36 +1468,36 @@ // CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1591,13 +1591,13 @@ // CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK13-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK13-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] @@ -1606,16 +1606,16 @@ // CHECK13-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] // CHECK13-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP20]], 0 // CHECK13-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK13-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] // CHECK13-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 // CHECK13-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP19]], [[CONV22]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK13-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK13-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] @@ -1625,22 +1625,22 @@ // CHECK13-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 // CHECK13-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] // CHECK13-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 -// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK13-NEXT: [[TMP23:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP23]] -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM33:%.*]] = sext i32 [[TMP24]] to i64 // CHECK13-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM33]] -// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -1685,38 +1685,38 @@ // CHECK13-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2 // CHECK13-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]] // CHECK13-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM6]] -// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 @@ -1792,13 +1792,13 @@ // CHECK15-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK15-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK15-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] @@ -1807,16 +1807,16 @@ // CHECK15-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] // CHECK15-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP18]], 0 // CHECK15-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK15-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] // CHECK15-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 // CHECK15-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP17]], [[CONV22]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP19]], 0 // CHECK15-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK15-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] @@ -1826,20 +1826,20 @@ // CHECK15-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 // CHECK15-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] // CHECK15-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 -// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[TMP21:%.*]] = mul nsw i32 [[TMP20]], [[TMP1]] // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i32 [[TMP21]] -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP22]] -// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ADD34:%.*]] = add nsw i64 [[TMP23]], 1 -// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -1884,36 +1884,36 @@ // CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2 // CHECK15-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2 // CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]] // CHECK15-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i32 0, i32 [[TMP6]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]] -// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 Index: clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp @@ -89,34 +89,34 @@ // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(ptr @_ZZ4mainE5sivar) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -177,26 +177,26 @@ // CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -211,21 +211,21 @@ // CHECK1: .omp.final.done: // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: @@ -240,15 +240,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -263,36 +263,36 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR3]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -352,26 +352,26 @@ // CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -386,21 +386,21 @@ // CHECK1: .omp.final.done: // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: @@ -415,15 +415,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -445,34 +445,34 @@ // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(ptr @_ZZ4mainE5sivar) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -533,26 +533,26 @@ // CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -567,21 +567,21 @@ // CHECK3: .omp.final.done: // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: @@ -596,15 +596,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -619,36 +619,36 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR3]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -708,26 +708,26 @@ // CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -742,21 +742,21 @@ // CHECK3: .omp.final.done: // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 // CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: @@ -771,15 +771,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -808,26 +808,26 @@ // CHECK5-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] -// CHECK5-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 @@ -854,37 +854,37 @@ // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: store i32 0, ptr [[T_VAR1]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4 // CHECK5-NEXT: ret i32 0 // @@ -907,26 +907,26 @@ // CHECK7-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] -// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 @@ -953,37 +953,37 @@ // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: store i32 0, ptr [[T_VAR1]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK7-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4 // CHECK7-NEXT: ret i32 0 // @@ -1051,29 +1051,29 @@ // CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !4 +// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1088,21 +1088,21 @@ // CHECK9: .omp.final.done: // CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 // CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP21]] monotonic, align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4 // CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: @@ -1117,15 +1117,15 @@ // CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK9-NEXT: ret void // // Index: clang/test/OpenMP/target_teams_map_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_map_codegen.cpp +++ clang/test/OpenMP/target_teams_map_codegen.cpp @@ -86,40 +86,40 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27() #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -156,40 +156,40 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.2, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.2, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33(ptr [[X]], ptr [[Y]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -242,40 +242,40 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.5, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.5, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39(ptr [[X]], ptr [[Y]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -316,32 +316,32 @@ // CHECK1-NEXT: store i32 0, ptr [[Y2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[X1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[Y2]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[Y2]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[X1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[Y2]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP14]] monotonic, align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP16]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP11]] monotonic, align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP13]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -355,23 +355,23 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4 // CHECK1-NEXT: ret void // // @@ -384,34 +384,34 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.8, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.8, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45(ptr [[X]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -454,34 +454,34 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.11, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.11, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51(ptr [[X]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -524,34 +524,34 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.14, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.15, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.14, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.15, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57(ptr [[X]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -599,92 +599,92 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[Y]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP18]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.18, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.19, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.18, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.19, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63(ptr [[Y]], ptr [[Z]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP45]], align 4 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP46]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP44]], ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.22, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.23, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK1-NEXT: [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0 -// CHECK1-NEXT: br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP33]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, ptr [[TMP34]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP32]], ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.22, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.23, ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65.region_id, ptr [[KERNEL_ARGS4]]) +// CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK1-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] // CHECK1: omp_offload.failed5: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65(ptr [[Y]], ptr [[Z]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT6]] @@ -724,58 +724,58 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false) // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] // CHECK1: omp.arraycpy.body8: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] // CHECK1: omp.arraycpy.done14: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -789,24 +789,24 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void @@ -844,58 +844,58 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false) // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] // CHECK1: omp.arraycpy.body8: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] // CHECK1: omp.arraycpy.done14: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -909,24 +909,24 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void @@ -946,92 +946,92 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[Y]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP18]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.26, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.27, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l72.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.26, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.27, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l72.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l72(ptr [[Y]], ptr [[Z]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP45]], align 4 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP46]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP44]], ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.30, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.31, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l74.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK1-NEXT: [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0 -// CHECK1-NEXT: br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP33]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, ptr [[TMP34]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP32]], ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.30, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.31, ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l74.region_id, ptr [[KERNEL_ARGS4]]) +// CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK1-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] // CHECK1: omp_offload.failed5: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l74(ptr [[Y]], ptr [[Z]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT6]] @@ -1077,35 +1077,35 @@ // CHECK1-NEXT: store i128 0, ptr [[Z2]], align 16 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.25, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.25, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP1]], align 16 -// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP1]], align 16 +// CHECK1-NEXT: [[TMP8:%.*]] = load i128, ptr [[Z2]], align 16 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP7]], [[TMP8]] // CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z2]], align 16 +// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[Z2]], align 16 // CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP14:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 -// CHECK1-NEXT: store i128 [[TMP14]], ptr [[TMP]], align 16 -// CHECK1-NEXT: [[TMP15:%.*]] = load i128, ptr [[TMP]], align 16 -// CHECK1-NEXT: [[TMP16:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 +// CHECK1-NEXT: store i128 [[TMP10]], ptr [[TMP]], align 16 +// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP]], align 16 +// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z2]], align 16 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP11]], [[TMP12]] // CHECK1-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16 // CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0) // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1119,15 +1119,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 16 -// CHECK1-NEXT: [[TMP13:%.*]] = load i128, ptr [[TMP7]], align 16 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP10]], align 16 +// CHECK1-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 16 +// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP5]], align 16 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP7]], align 16 // CHECK1-NEXT: ret void // // @@ -1169,35 +1169,35 @@ // CHECK1-NEXT: store i128 0, ptr [[Z2]], align 16 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.29, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.29, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP1]], align 16 -// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP1]], align 16 +// CHECK1-NEXT: [[TMP8:%.*]] = load i128, ptr [[Z2]], align 16 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP7]], [[TMP8]] // CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z2]], align 16 +// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[Z2]], align 16 // CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP14:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 -// CHECK1-NEXT: store i128 [[TMP14]], ptr [[TMP]], align 16 -// CHECK1-NEXT: [[TMP15:%.*]] = load i128, ptr [[TMP]], align 16 -// CHECK1-NEXT: [[TMP16:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 +// CHECK1-NEXT: store i128 [[TMP10]], ptr [[TMP]], align 16 +// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP]], align 16 +// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z2]], align 16 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP11]], [[TMP12]] // CHECK1-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16 // CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0) // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1211,15 +1211,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 16 -// CHECK1-NEXT: [[TMP13:%.*]] = load i128, ptr [[TMP7]], align 16 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP10]], align 16 +// CHECK1-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 16 +// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP5]], align 16 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP7]], align 16 // CHECK1-NEXT: ret void // // @@ -1240,40 +1240,40 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 2, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27() #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1310,40 +1310,40 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 2, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.2, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.2, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33(ptr [[X]], ptr [[Y]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1396,40 +1396,40 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 2, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.5, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.5, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39(ptr [[X]], ptr [[Y]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1470,32 +1470,32 @@ // CHECK3-NEXT: store i32 0, ptr [[Y2]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[X1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y2]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y2]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[X1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[Y2]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP14]] monotonic, align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP11]] monotonic, align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP13]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1509,23 +1509,23 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4 // CHECK3-NEXT: ret void // // @@ -1538,34 +1538,34 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.8, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.8, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45(ptr [[X]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1608,34 +1608,34 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.11, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.11, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51(ptr [[X]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1678,34 +1678,34 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.14, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.15, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.14, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.15, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57(ptr [[X]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1753,92 +1753,92 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[Y]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 3, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.18, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.19, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP25]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK3-NEXT: br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 3, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.18, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.19, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63(ptr [[Y]], ptr [[Z]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP38]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP40]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP42]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP45]], align 4 -// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK3-NEXT: store i32 3, ptr [[TMP46]], align 4 -// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP43]], ptr [[TMP47]], align 4 -// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP44]], ptr [[TMP48]], align 4 -// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.22, ptr [[TMP49]], align 4 -// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.23, ptr [[TMP50]], align 4 -// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP51]], align 4 -// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP53]], align 8 -// CHECK3-NEXT: [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK3-NEXT: [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0 -// CHECK3-NEXT: br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CHECK3-NEXT: store i32 3, ptr [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP31]], ptr [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP32]], ptr [[TMP36]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.22, ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.23, ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP41]], align 8 +// CHECK3-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65.region_id, ptr [[KERNEL_ARGS4]]) +// CHECK3-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK3-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] // CHECK3: omp_offload.failed5: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65(ptr [[Y]], ptr [[Z]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT6]] @@ -1878,58 +1878,58 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false) // CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] // CHECK3-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK3: omp.arrayinit.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK3-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK3: omp.arrayinit.done: -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] // CHECK3: omp.arraycpy.done6: -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] // CHECK3: omp.arraycpy.body8: // CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] // CHECK3: omp.arraycpy.done14: -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1943,24 +1943,24 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK3: omp.arraycpy.done2: // CHECK3-NEXT: ret void @@ -1998,58 +1998,58 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false) // CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] // CHECK3-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK3: omp.arrayinit.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK3-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK3: omp.arrayinit.done: -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] // CHECK3: omp.arraycpy.done6: -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] // CHECK3: omp.arraycpy.body8: // CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] // CHECK3: omp.arraycpy.done14: -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -2063,24 +2063,24 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK3: omp.arraycpy.done2: // CHECK3-NEXT: ret void @@ -2180,32 +2180,32 @@ // CHECK5-NEXT: store i32 0, ptr [[Y2]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK5-NEXT: store ptr [[X1]], ptr [[TMP2]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK5-NEXT: store ptr [[Y2]], ptr [[TMP4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK5-NEXT: store ptr [[Y2]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[X1]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[Y2]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP14]] monotonic, align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP16]] monotonic, align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP11]] monotonic, align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP13]] monotonic, align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2219,23 +2219,23 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4 // CHECK5-NEXT: ret void // // @@ -2349,58 +2349,58 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false) // CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] // CHECK5-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK5: omp.arrayinit.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK5-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK5: omp.arrayinit.done: -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] // CHECK5: omp.arraycpy.done6: -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] // CHECK5: omp.arraycpy.body8: // CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] // CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] // CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2414,24 +2414,24 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK5: omp.arraycpy.done2: // CHECK5-NEXT: ret void @@ -2469,58 +2469,58 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false) // CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] // CHECK5-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK5: omp.arrayinit.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK5-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK5: omp.arrayinit.done: -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] // CHECK5: omp.arraycpy.done6: -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] // CHECK5: omp.arraycpy.body8: // CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] // CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] // CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2534,24 +2534,24 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK5: omp.arraycpy.done2: // CHECK5-NEXT: ret void @@ -2595,35 +2595,35 @@ // CHECK5-NEXT: store i128 0, ptr [[Z2]], align 16 // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.11, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.11, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP1]], align 16 -// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP1]], align 16 +// CHECK5-NEXT: [[TMP8:%.*]] = load i128, ptr [[Z2]], align 16 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP7]], [[TMP8]] // CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16 -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z2]], align 16 +// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[Z2]], align 16 // CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) #[[ATTR6:[0-9]+]] // CHECK5-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK5: atomic_cont: -// CHECK5-NEXT: [[TMP14:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 -// CHECK5-NEXT: store i128 [[TMP14]], ptr [[TMP]], align 16 -// CHECK5-NEXT: [[TMP15:%.*]] = load i128, ptr [[TMP]], align 16 -// CHECK5-NEXT: [[TMP16:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 +// CHECK5-NEXT: store i128 [[TMP10]], ptr [[TMP]], align 16 +// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP]], align 16 +// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z2]], align 16 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP11]], [[TMP12]] // CHECK5-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16 // CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]] // CHECK5-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK5: atomic_exit: -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2637,15 +2637,15 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 16 -// CHECK5-NEXT: [[TMP13:%.*]] = load i128, ptr [[TMP7]], align 16 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP10]], align 16 +// CHECK5-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 16 +// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP5]], align 16 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP7]], align 16 // CHECK5-NEXT: ret void // // @@ -2687,35 +2687,35 @@ // CHECK5-NEXT: store i128 0, ptr [[Z2]], align 16 // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.13, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.13, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP1]], align 16 -// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP1]], align 16 +// CHECK5-NEXT: [[TMP8:%.*]] = load i128, ptr [[Z2]], align 16 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP7]], [[TMP8]] // CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16 -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z2]], align 16 +// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[Z2]], align 16 // CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) #[[ATTR6]] // CHECK5-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK5: atomic_cont: -// CHECK5-NEXT: [[TMP14:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 -// CHECK5-NEXT: store i128 [[TMP14]], ptr [[TMP]], align 16 -// CHECK5-NEXT: [[TMP15:%.*]] = load i128, ptr [[TMP]], align 16 -// CHECK5-NEXT: [[TMP16:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 +// CHECK5-NEXT: store i128 [[TMP10]], ptr [[TMP]], align 16 +// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP]], align 16 +// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z2]], align 16 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP11]], [[TMP12]] // CHECK5-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16 // CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]] // CHECK5-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK5: atomic_exit: -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2729,15 +2729,15 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 16 -// CHECK5-NEXT: [[TMP13:%.*]] = load i128, ptr [[TMP7]], align 16 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP10]], align 16 +// CHECK5-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 16 +// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP5]], align 16 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP7]], align 16 // CHECK5-NEXT: ret void // // @@ -2828,32 +2828,32 @@ // CHECK7-NEXT: store i32 0, ptr [[Y2]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK7-NEXT: store ptr [[X1]], ptr [[TMP2]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK7-NEXT: store ptr [[Y2]], ptr [[TMP4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK7-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[Y2]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK7-NEXT: ] // CHECK7: .omp.reduction.case1: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[X1]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[Y2]], align 4 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.case2: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP14]] monotonic, align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP16]] monotonic, align 4 -// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP11]] monotonic, align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP13]] monotonic, align 4 +// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.default: // CHECK7-NEXT: ret void @@ -2867,23 +2867,23 @@ // CHECK7-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK7-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4 // CHECK7-NEXT: ret void // // @@ -2997,58 +2997,58 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false) // CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] // CHECK7-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK7: omp.arrayinit.body: // CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK7-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK7: omp.arrayinit.done: -// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK7-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) -// CHECK7-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK7-NEXT: ] // CHECK7: .omp.reduction.case1: -// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: // CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] // CHECK7: omp.arraycpy.done6: -// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.case2: -// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] // CHECK7: omp.arraycpy.body8: // CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] // CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] // CHECK7: omp.arraycpy.done14: -// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.default: // CHECK7-NEXT: ret void @@ -3062,24 +3062,24 @@ // CHECK7-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK7-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK7: omp.arraycpy.done2: // CHECK7-NEXT: ret void @@ -3117,58 +3117,58 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false) // CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] // CHECK7-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK7: omp.arrayinit.body: // CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK7-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK7: omp.arrayinit.done: -// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK7-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) -// CHECK7-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK7-NEXT: ] // CHECK7: .omp.reduction.case1: -// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: // CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] // CHECK7: omp.arraycpy.done6: -// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.case2: -// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] // CHECK7: omp.arraycpy.body8: // CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] // CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] // CHECK7: omp.arraycpy.done14: -// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.default: // CHECK7-NEXT: ret void @@ -3182,24 +3182,24 @@ // CHECK7-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK7-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK7: omp.arraycpy.done2: // CHECK7-NEXT: ret void Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1447,17 +1447,13 @@ /// /// \param Loc The insert and source location description. /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. - /// \param RequiresFullRuntime Indicate if a full device runtime is necessary. - InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, - bool RequiresFullRuntime); + InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD); /// Create a runtime call for kmpc_target_deinit /// /// \param Loc The insert and source location description. /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. - /// \param RequiresFullRuntime Indicate if a full device runtime is necessary. - void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, - bool RequiresFullRuntime); + void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD); ///} Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -444,8 +444,8 @@ /* Int */ Int32, /* kmp_task_t */ VoidPtr) /// OpenMP Device runtime functions -__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1, Int1) -__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8, Int1) +__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1) +__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3876,8 +3876,7 @@ } OpenMPIRBuilder::InsertPointTy -OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, - bool RequiresFullRuntime) { +OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD) { if (!updateToLocation(Loc)) return Loc.IP; @@ -3889,14 +3888,12 @@ IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); ConstantInt *UseGenericStateMachine = ConstantInt::getBool(Int32->getContext(), !IsSPMD); - ConstantInt *RequiresFullRuntimeVal = - ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime); Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_init); CallInst *ThreadKind = Builder.CreateCall( - Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal}); + Fn, {Ident, IsSPMDVal, UseGenericStateMachine}); Value *ExecUserCode = Builder.CreateICmpEQ( ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), @@ -3930,8 +3927,7 @@ } void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, - bool IsSPMD, - bool RequiresFullRuntime) { + bool IsSPMD) { if (!updateToLocation(Loc)) return; @@ -3941,13 +3937,11 @@ ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); - ConstantInt *RequiresFullRuntimeVal = - ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime); Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_deinit); - Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal}); + Builder.CreateCall(Fn, {Ident, IsSPMDVal}); } void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes( Index: llvm/lib/Transforms/IPO/OpenMPOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1863,7 +1863,8 @@ if (!UV || UV->size() + (ReplVal != nullptr) < 2) return false; - LLVM_DEBUG( +F.dump(); + ( dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name << (ReplVal ? " with an existing value\n" : "\n") << "\n"); @@ -3290,8 +3291,6 @@ constexpr const int InitModeArgNo = 1; constexpr const int DeinitModeArgNo = 1; constexpr const int InitUseStateMachineArgNo = 2; - constexpr const int InitRequiresFullRuntimeArgNo = 3; - constexpr const int DeinitRequiresFullRuntimeArgNo = 2; A.registerSimplificationCallback( IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo), StateMachineSimplifyCB); @@ -3301,14 +3300,6 @@ A.registerSimplificationCallback( IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo), ModeSimplifyCB); - A.registerSimplificationCallback( - IRPosition::callsite_argument(*KernelInitCB, - InitRequiresFullRuntimeArgNo), - IsGenericModeSimplifyCB); - A.registerSimplificationCallback( - IRPosition::callsite_argument(*KernelDeinitCB, - DeinitRequiresFullRuntimeArgNo), - IsGenericModeSimplifyCB); // Check if we know we are in SPMD-mode already. ConstantInt *ModeArg = @@ -3702,8 +3693,6 @@ const int InitModeArgNo = 1; const int DeinitModeArgNo = 1; const int InitUseStateMachineArgNo = 2; - const int InitRequiresFullRuntimeArgNo = 3; - const int DeinitRequiresFullRuntimeArgNo = 2; auto &Ctx = getAnchorValue().getContext(); A.changeUseAfterManifest( @@ -3717,12 +3706,6 @@ KernelDeinitCB->getArgOperandUse(DeinitModeArgNo), *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_SPMD)); - A.changeUseAfterManifest( - KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo), - *ConstantInt::getBool(Ctx, false)); - A.changeUseAfterManifest( - KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo), - *ConstantInt::getBool(Ctx, false)); ++NumOpenMPTargetRegionKernelsSPMD; Index: llvm/test/Transforms/OpenMP/always_inline_device.ll =================================================================== --- llvm/test/Transforms/OpenMP/always_inline_device.ll +++ llvm/test/Transforms/OpenMP/always_inline_device.ll @@ -13,7 +13,7 @@ ; CHECK: Function Attrs: convergent norecurse nounwind ; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -24,13 +24,13 @@ ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: store i8 1, i8* @G, align 1 -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -42,7 +42,7 @@ %isSPMD = call i8 @__kmpc_is_spmd_exec_mode() store i8 %isSPMD, i8* @G call void @bar() #2 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -51,9 +51,9 @@ declare i8 @__kmpc_is_spmd_exec_mode() -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) ; Function Attrs: convergent nounwind define hidden void @bar() #1 { Index: llvm/test/Transforms/OpenMP/barrier_removal.ll =================================================================== --- llvm/test/Transforms/OpenMP/barrier_removal.ll +++ llvm/test/Transforms/OpenMP/barrier_removal.ll @@ -249,6 +249,7 @@ ; CHECK: attributes #[[ATTR0:[0-9]+]] = { "llvm.assume"="ompx_aligned_barrier" } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent nocallback nounwind } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} Index: llvm/test/Transforms/OpenMP/custom_state_machines.ll =================================================================== --- llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -141,7 +141,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -149,7 +149,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -157,7 +157,7 @@ } ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) { +define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { ret i32 0 } @@ -193,14 +193,14 @@ declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -208,7 +208,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__1(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -295,7 +295,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -303,7 +303,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -375,7 +375,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -383,7 +383,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -463,7 +463,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -471,7 +471,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__9(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -549,7 +549,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -557,7 +557,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__12(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -638,7 +638,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -646,7 +646,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__15(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -693,7 +693,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -701,7 +701,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__16(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -850,18 +850,23 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; AMDGPU-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void ; ; +; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init +; AMDGPU-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; AMDGPU-NEXT: ret i32 0 +; +; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ ; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { @@ -914,7 +919,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -961,7 +966,7 @@ ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1040,7 +1045,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1093,7 +1098,7 @@ ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1192,7 +1197,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1241,7 +1246,7 @@ ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1319,7 +1324,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1366,7 +1371,7 @@ ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1444,7 +1449,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1491,7 +1496,7 @@ ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1569,7 +1574,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1611,7 +1616,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1677,7 +1682,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1713,7 +1718,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1833,18 +1838,23 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; NVPTX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void ; ; +; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init +; NVPTX-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; NVPTX-NEXT: ret i32 0 +; +; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ ; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { @@ -1897,7 +1907,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -1943,7 +1953,7 @@ ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2022,7 +2032,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2074,7 +2084,7 @@ ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2173,7 +2183,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2221,7 +2231,7 @@ ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2299,7 +2309,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2345,7 +2355,7 @@ ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2423,7 +2433,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2469,7 +2479,7 @@ ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2547,7 +2557,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2588,7 +2598,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2654,7 +2664,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2689,7 +2699,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2809,18 +2819,23 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void ; ; +; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init +; AMDGPU-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; AMDGPU-DISABLED-NEXT: ret i32 0 +; +; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ ; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { @@ -2872,14 +2887,14 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -2957,14 +2972,14 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3062,14 +3077,14 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3146,14 +3161,14 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3230,14 +3245,14 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3314,13 +3329,13 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3385,13 +3400,13 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: ; AMDGPU-DISABLED-NEXT: ret void @@ -3511,18 +3526,23 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void ; ; +; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init +; NVPTX-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; NVPTX-DISABLED-NEXT: ret i32 0 +; +; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ ; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { @@ -3574,14 +3594,14 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3659,14 +3679,14 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3764,14 +3784,14 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3848,14 +3868,14 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -3932,14 +3952,14 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -4016,13 +4036,13 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void @@ -4087,13 +4107,13 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: ; NVPTX-DISABLED-NEXT: ret void Index: llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll =================================================================== --- llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll +++ llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll @@ -141,7 +141,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -149,7 +149,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -157,7 +157,7 @@ } ; Make it a declaration so we will *not* apply custom state machine rewriting and wait for LTO. -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1); +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1); define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { entry: @@ -191,14 +191,14 @@ declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) define weak void @__omp_offloading_14_a36502b_simple_state_machine_l22() #0 { entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -206,7 +206,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__1(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -293,7 +293,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -301,7 +301,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -373,7 +373,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -381,7 +381,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -461,7 +461,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -469,7 +469,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__9(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -547,7 +547,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -555,7 +555,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__12(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -636,7 +636,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -644,7 +644,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__15(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -691,7 +691,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -699,7 +699,7 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__16(i32* %.threadid_temp., i32* %.zero.addr) #3 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -848,13 +848,13 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; AMDGPU-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -911,14 +911,14 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -996,14 +996,14 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1101,14 +1101,14 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1185,14 +1185,14 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1269,14 +1269,14 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; AMDGPU-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1353,13 +1353,13 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1424,13 +1424,13 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: ; AMDGPU-NEXT: ret void @@ -1550,13 +1550,13 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; NVPTX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1613,14 +1613,14 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1698,14 +1698,14 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1803,14 +1803,14 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1887,14 +1887,14 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -1971,14 +1971,14 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 ; NVPTX-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2055,13 +2055,13 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void @@ -2126,13 +2126,13 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: ; NVPTX-NEXT: ret void Index: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll =================================================================== --- llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll +++ llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll @@ -60,7 +60,7 @@ define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 { entry: %captured_vars_addrs.i.i = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3, !dbg !18 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3, !dbg !18 %exec_user_code = icmp eq i32 %0, -1, !dbg !18 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18 @@ -77,12 +77,12 @@ call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23 call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26 call void @unknown() #6, !dbg !27 - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1, i1 true) #3, !dbg !28 + call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1) #3, !dbg !28 br label %common.ret } ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) { +define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { ret i32 0 } @@ -102,13 +102,13 @@ ; Function Attrs: nounwind declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr ; Function Attrs: norecurse nounwind define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 { entry: %captured_vars_addrs.i2.i = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true, i1 true) #3, !dbg !33 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true) #3, !dbg !33 %exec_user_code = icmp eq i32 %0, -1, !dbg !33 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33 @@ -133,7 +133,7 @@ call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45 call void @no_openmp() call void @no_parallelism() - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1, i1 true) #3, !dbg !46 + call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1) #3, !dbg !46 br label %common.ret } Index: llvm/test/Transforms/OpenMP/deduplication_target.ll =================================================================== --- llvm/test/Transforms/OpenMP/deduplication_target.ll +++ llvm/test/Transforms/OpenMP/deduplication_target.ll @@ -19,37 +19,37 @@ ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void ; entry: %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 false, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 false) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) %2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2) ret void worker.exit: ; preds = %entry ret void } -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #1 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) attributes #0 = { convergent noinline norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } attributes #1 = { nounwind } Index: llvm/test/Transforms/OpenMP/fold_generic_main_thread.ll =================================================================== --- llvm/test/Transforms/OpenMP/fold_generic_main_thread.ll +++ llvm/test/Transforms/OpenMP/fold_generic_main_thread.ll @@ -10,7 +10,7 @@ define void @kernel() { ; CHECK-LABEL: define {{[^@]+}}@kernel() { -; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 false) +; CHECK-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: @@ -20,11 +20,11 @@ ; CHECK-NEXT: call void @bar() ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() { -; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false, i1 false) +; CHECK-DISABLED-NEXT: [[CALL:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 1, i1 false) ; CHECK-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1 ; CHECK-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK-DISABLED: if.then: @@ -34,10 +34,10 @@ ; CHECK-DISABLED-NEXT: call void @bar() ; CHECK-DISABLED-NEXT: br label [[IF_END]] ; CHECK-DISABLED: if.end: -; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true) +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) ; CHECK-DISABLED-NEXT: ret void ; - %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false, i1 false) + %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false) %cmp = icmp eq i32 %call, -1 br i1 %cmp, label %if.then, label %if.else if.then: @@ -47,7 +47,7 @@ call void @bar() br label %if.end if.end: - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) ret void } @@ -135,9 +135,9 @@ declare i32 @__kmpc_get_hardware_thread_id() -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5, !6} Index: llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll =================================================================== --- llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -18,7 +18,7 @@ define weak void @kernel0() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -28,15 +28,15 @@ ; CHECK-NEXT: call void @helper0() #[[ATTR1:[0-9]+]] ; CHECK-NEXT: call void @helper1() #[[ATTR1]] ; CHECK-NEXT: call void @helper2() #[[ATTR1]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) call void @helper0() call void @helper1() call void @helper2() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) ret void } @@ -45,7 +45,7 @@ define weak void @kernel1() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel1 ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -53,13 +53,13 @@ ; CHECK-NEXT: ret void ; CHECK: main.thread.user_code: ; CHECK-NEXT: call void @helper1() #[[ATTR1]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) call void @helper1() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) ret void } @@ -70,7 +70,7 @@ ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: @@ -82,12 +82,12 @@ ; CHECK-NEXT: call void @helper1() #[[ATTR1]] ; CHECK-NEXT: call void @helper2() #[[ATTR1]] ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0) -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) ; CHECK-NEXT: ret void ; entry: %captured_vars_addrs = alloca [0 x i8*], align 8 - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true, i1 true) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true) %exec_user_code = icmp eq i32 %i, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -101,7 +101,7 @@ call void @helper1() call void @helper2() call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0) - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) ret void } @@ -202,8 +202,8 @@ ret i32 %ret } declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy() -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext, i1 zeroext) #1 -declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8, i1 zeroext) #1 +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext) #1 +declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8) #1 declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) declare i32 @__kmpc_global_thread_num(%struct.ident_t*) Index: llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll =================================================================== --- llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll +++ llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold_optnone.ll @@ -16,18 +16,18 @@ define weak void @kernel0() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false) ; CHECK-NEXT: call void @helper0() ; CHECK-NEXT: call void @helper1() ; CHECK-NEXT: call void @helper2() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false) call void @helper0() call void @helper1() call void @helper2() - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true) ret void } @@ -36,14 +36,14 @@ define weak void @kernel1() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel1 ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false) ; CHECK-NEXT: call void @helper1() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false) call void @helper1() - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false) ret void } @@ -52,18 +52,18 @@ define weak void @kernel2() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel2 ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false) ; CHECK-NEXT: call void @helper0() ; CHECK-NEXT: call void @helper1() ; CHECK-NEXT: call void @helper2() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 false) call void @helper0() call void @helper1() call void @helper2() - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false) ret void } @@ -112,8 +112,8 @@ } declare i32 @__kmpc_get_hardware_num_threads_in_block() -declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext, i1 zeroext) #1 -declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext, i1 zeroext) #1 +declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext) #1 +declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext) #1 !llvm.module.flags = !{!0, !1} Index: llvm/test/Transforms/OpenMP/global_constructor.ll =================================================================== --- llvm/test/Transforms/OpenMP/global_constructor.ll +++ llvm/test/Transforms/OpenMP/global_constructor.ll @@ -10,7 +10,7 @@ define weak void @__omp_offloading_fd02_85283c04_main_l11(double* nonnull align 8 dereferenceable(8) %X) local_unnamed_addr { entry: - %0 = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 2, i1 false, i1 false) #0 + %0 = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 2, i1 false) #0 %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -29,13 +29,13 @@ region.barrier: tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @1, i32 %2) - tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 2, i1 false) #0 + tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 2) #0 br label %common.ret } -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) local_unnamed_addr +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) local_unnamed_addr -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr define internal void @__omp_offloading__fd02_85283c04_Device_l6_ctor() { entry: @@ -78,21 +78,32 @@ ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_85283c04_main_l11 ; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 false) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1:[0-9]+]], i8 2, i1 false) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: user_code.entry: ; CHECK-NEXT: [[TMP1:%.*]] = load double, double* @_ZL6Device, align 8, !tbaa [[TBAA11:![0-9]+]] -; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR1]] +; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR2]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] ; CHECK: region.guarded: ; CHECK-NEXT: store double [[TMP1]], double* [[X]], align 8, !tbaa [[TBAA11]] ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR1]] -; CHECK-NEXT: tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR1]] +; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]] +; CHECK-NEXT: tail call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2) #[[ATTR2]] ; CHECK-NEXT: br label [[COMMON_RET]] ; +; +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading__fd02_85283c04_Device_l6_ctor +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL_I:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: [[CALL_I2:%.*]] = tail call double @__nv_log(double noundef 2.000000e+00) #[[ATTR1]] +; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[CALL_I]], [[CALL_I2]] +; CHECK-NEXT: store double [[DIV]], double* @_ZL6Device, align 8, !tbaa [[TBAA11]] +; CHECK-NEXT: ret void +; Index: llvm/test/Transforms/OpenMP/globalization_remarks.ll =================================================================== --- llvm/test/Transforms/OpenMP/globalization_remarks.ll +++ llvm/test/Transforms/OpenMP/globalization_remarks.ll @@ -13,13 +13,13 @@ define void @foo() { entry: - %c = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true, i1 true) + %c = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true) %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10 %x_on_stack = bitcast i8* %0 to i32* %1 = bitcast i32* %x_on_stack to i8* call void @share(i8* %1), !dbg !10 call void @__kmpc_free_shared(i8* %0) - call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false) ret void } @@ -33,9 +33,9 @@ declare void @__kmpc_free_shared(i8* nocapture) -declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1); +declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1); -declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i1) !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !5, !6} Index: llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll =================================================================== --- llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll +++ llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll @@ -48,7 +48,7 @@ %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 store i32 0, i32* %.zero.addr, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -56,14 +56,14 @@ %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) store i32 %1, i32* %.threadid_temp., align 4 call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry ret void } -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) { +define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { ret i32 0 } @@ -149,7 +149,7 @@ declare i32 @__kmpc_global_thread_num(%struct.ident_t*) -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { entry: Index: llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll =================================================================== --- llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll +++ llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll @@ -21,18 +21,18 @@ ;. define weak void @is_spmd() { ; CHECK-LABEL: define {{[^@]+}}@is_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: call void @is_spmd_helper1() ; CHECK-NEXT: call void @is_spmd_helper2() ; CHECK-NEXT: call void @is_mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) call void @is_spmd_helper1() call void @is_spmd_helper2() call void @is_mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) ret void } @@ -40,7 +40,7 @@ ; CHECK-LABEL: define {{[^@]+}}@will_be_spmd() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: @@ -50,12 +50,12 @@ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @is_spmd_helper2() ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0) -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) ; CHECK-NEXT: ret void ; entry: %captured_vars_addrs = alloca [0 x i8*], align 8 - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true, i1 true) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 true) %exec_user_code = icmp eq i32 %i, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -67,41 +67,41 @@ %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** call void @is_spmd_helper2() call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0) - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) ret void } define weak void @non_spmd() { ; CHECK-LABEL: define {{[^@]+}}@non_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) ; CHECK-NEXT: call void @is_generic_helper1() ; CHECK-NEXT: call void @is_generic_helper2() ; CHECK-NEXT: call void @is_mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) call void @is_generic_helper1() call void @is_generic_helper2() call void @is_mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) ret void } define weak void @will_not_be_spmd() { ; CHECK-LABEL: define {{[^@]+}}@will_not_be_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) ; CHECK-NEXT: call void @is_generic_helper1() ; CHECK-NEXT: call void @is_generic_helper2() ; CHECK-NEXT: call void @is_mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) call void @is_generic_helper1() call void @is_generic_helper2() call void @is_mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) ret void } @@ -198,8 +198,8 @@ declare void @spmd_compatible() "llvm.assume"="ompx_spmd_amenable" declare i8 @__kmpc_is_spmd_exec_mode() -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext, i1 zeroext) -declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8, i1 zeroext) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext) +declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8) declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) declare i32 @__kmpc_global_thread_num(%struct.ident_t*) declare void @foo() Index: llvm/test/Transforms/OpenMP/parallel_level_fold.ll =================================================================== --- llvm/test/Transforms/OpenMP/parallel_level_fold.ll +++ llvm/test/Transforms/OpenMP/parallel_level_fold.ll @@ -19,46 +19,46 @@ ;. define weak void @none_spmd() { ; CHECK-LABEL: define {{[^@]+}}@none_spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) ; CHECK-NEXT: call void @none_spmd_helper() ; CHECK-NEXT: call void @mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false) call void @none_spmd_helper() call void @mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) ret void } define weak void @spmd() { ; CHECK-LABEL: define {{[^@]+}}@spmd() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: call void @spmd_helper() ; CHECK-NEXT: call void @mixed_helper() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) call void @spmd_helper() call void @mixed_helper() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) ret void } define weak void @parallel() { ; CHECK-LABEL: define {{[^@]+}}@parallel() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: call void @spmd_helper() ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0) -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false) call void @spmd_helper() call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0) - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2) ret void } @@ -130,8 +130,8 @@ declare void @foo() declare void @bar() declare i8 @__kmpc_parallel_level() -declare i32 @__kmpc_target_init(%struct.ident_t*, i8 zeroext, i1 zeroext, i1 zeroext) #1 -declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8 zeroext, i1 zeroext) #1 +declare i32 @__kmpc_target_init(%struct.ident_t*, i8 zeroext, i1 zeroext) #1 +declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8 zeroext) #1 !llvm.module.flags = !{!0, !1} !nvvm.annotations = !{!2, !3, !4} Index: llvm/test/Transforms/OpenMP/remove_globalization.ll =================================================================== --- llvm/test/Transforms/OpenMP/remove_globalization.ll +++ llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -17,44 +17,52 @@ %struct.ident_t = type { i32, i32, i32, i32, i8* } ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) { - ret i32 0 -} -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) - ;. ; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8* ;. ; CHECK-DISABLED: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8* ;. +define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { +; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init +; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-NEXT: ret i32 0 +; +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init +; CHECK-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-DISABLED-NEXT: ret i32 0 +; + ret i32 0 +} +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) + define void @kernel() { ; CHECK-LABEL: define {{[^@]+}}@kernel() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false) ; CHECK-NEXT: call void @foo() #[[ATTR0:[0-9]+]] ; CHECK-NEXT: call void @bar() #[[ATTR0]] ; CHECK-NEXT: call void @convert_and_move_alloca() #[[ATTR0]] ; CHECK-NEXT: call void @unknown_no_openmp() -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1) ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true) +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false) ; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR0:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR0]] ; CHECK-DISABLED-NEXT: call void @convert_and_move_alloca() #[[ATTR0]] ; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() -; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1) ; CHECK-DISABLED-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 true) call void @foo() call void @bar() call void @convert_and_move_alloca() call void @unknown_no_openmp() - call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1) ret void } @@ -294,3 +302,5 @@ ; CHECK-DISABLED: [[META10:![0-9]+]] = !DISubroutineType(types: !2) ; CHECK-DISABLED: [[DBG11]] = !DILocation(line: 6, column: 2, scope: !9) ;. +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-REMARKS: {{.*}} Index: llvm/test/Transforms/OpenMP/replace_globalization.ll =================================================================== --- llvm/test/Transforms/OpenMP/replace_globalization.ll +++ llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -25,19 +25,19 @@ define dso_local void @foo() "kernel" { entry: - %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %x = call align 4 i8* @__kmpc_alloc_shared(i64 4) call void @unknown_no_openmp() %x_on_stack = bitcast i8* %x to i32* %0 = bitcast i32* %x_on_stack to i8* call void @use(i8* %0) call void @__kmpc_free_shared(i8* %x, i64 4) - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void } define void @bar() "kernel" { - %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) call void @unknown_no_openmp() %cmp = icmp eq i32 %c, -1 br i1 %cmp, label %master1, label %exit @@ -60,12 +60,12 @@ call void @__kmpc_free_shared(i8* %y, i64 4) br label %exit exit: - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void } define void @baz_spmd() "kernel" { - %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 true, i1 true) + %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 2, i1 true) call void @unknown_no_openmp() %c0 = icmp eq i32 %c, -1 br i1 %c0, label %master3, label %exit @@ -77,7 +77,7 @@ call void @__kmpc_free_shared(i8* %z, i64 24) br label %exit exit: - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 2) ret void } @@ -106,11 +106,11 @@ declare i32 @llvm.nvvm.read.ptx.sreg.warpsize() ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) { +define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { ret i32 0 } -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" @@ -147,18 +147,18 @@ ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: call void @use.internalized(i8* nofree [[X]]) #[[ATTR6]] ; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR6]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]] @@ -173,13 +173,13 @@ ; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*)) #[[ATTR6]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@baz_spmd ; CHECK-SAME: () #[[ATTR0]] { -; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 true, i1 true) +; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]] @@ -189,7 +189,7 @@ ; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR6]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; ; @@ -215,6 +215,11 @@ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([1024 x i8], [1024 x i8] addrspace(3)* @stack, i32 0, i32 0) to i8*), i32 [[L]] ; CHECK-NEXT: ret i8* [[GEP]] ; +; +; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init +; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-NEXT: ret i32 0 +; ;. ; CHECK: attributes #[[ATTR0]] = { "kernel" } ; CHECK: attributes #[[ATTR1]] = { nofree norecurse nounwind memory(write) } @@ -238,3 +243,6 @@ ; CHECK: [[META11:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !12, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) ; CHECK: [[META12:![0-9]+]] = !DISubroutineType(types: !2) ;. +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-LIMIT: {{.*}} +; CHECK-REMARKS: {{.*}} Index: llvm/test/Transforms/OpenMP/single_threaded_execution.ll =================================================================== --- llvm/test/Transforms/OpenMP/single_threaded_execution.ll +++ llvm/test/Transforms/OpenMP/single_threaded_execution.ll @@ -15,7 +15,7 @@ ; CHECK-NOT: [openmp-opt] Basic block @kernel if.else is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @kernel if.end is executed by a single thread. define void @kernel() { - %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false, i1 false) + %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 false) %cmp = icmp eq i32 %call, -1 br i1 %cmp, label %if.then, label %if.else if.then: @@ -23,7 +23,7 @@ if.else: br label %if.end if.end: - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1) ret void } @@ -106,9 +106,9 @@ declare void @__kmpc_kernel_prepare_parallel(i8*) -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) attributes #0 = { cold noinline } Index: llvm/test/Transforms/OpenMP/spmdization.ll =================================================================== --- llvm/test/Transforms/OpenMP/spmdization.ll +++ llvm/test/Transforms/OpenMP/spmdization.ll @@ -197,7 +197,7 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; AMDGPU: common.ret: @@ -206,7 +206,7 @@ ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] ; AMDGPU-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug @@ -214,7 +214,7 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; NVPTX: common.ret: @@ -223,7 +223,7 @@ ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] ; NVPTX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug @@ -232,7 +232,7 @@ ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -277,7 +277,7 @@ ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug @@ -286,7 +286,7 @@ ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -330,13 +330,13 @@ ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4:[0-9]+]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -348,7 +348,7 @@ store i32 0, i32* %.zero.addr, align 4 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #6 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) br label %common.ret } @@ -542,7 +542,7 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; AMDGPU: common.ret: @@ -551,7 +551,7 @@ ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 @@ -559,7 +559,7 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; NVPTX: common.ret: @@ -568,7 +568,7 @@ ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 @@ -577,7 +577,7 @@ ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -622,7 +622,7 @@ ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20 @@ -631,7 +631,7 @@ ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -675,13 +675,13 @@ ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -693,7 +693,7 @@ store i32 0, i32* %.zero.addr, align 4 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 call void @__omp_outlined__2(i32* %.threadid_temp., i32* %.zero.addr) #6 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) br label %common.ret } @@ -905,7 +905,7 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; AMDGPU: common.ret: @@ -914,7 +914,7 @@ ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 @@ -922,7 +922,7 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; NVPTX: common.ret: @@ -931,7 +931,7 @@ ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 @@ -940,7 +940,7 @@ ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -985,7 +985,7 @@ ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35 @@ -994,7 +994,7 @@ ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -1038,13 +1038,13 @@ ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -1056,7 +1056,7 @@ store i32 0, i32* %.zero.addr, align 4 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #6 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) br label %common.ret } @@ -1293,7 +1293,7 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; AMDGPU: common.ret: @@ -1302,7 +1302,7 @@ ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 @@ -1310,7 +1310,7 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; NVPTX: common.ret: @@ -1319,7 +1319,7 @@ ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 @@ -1328,7 +1328,7 @@ ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -1373,7 +1373,7 @@ ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50 @@ -1382,7 +1382,7 @@ ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -1426,13 +1426,13 @@ ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -1444,7 +1444,7 @@ store i32 0, i32* %.zero.addr, align 4 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #6 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) br label %common.ret } @@ -1717,7 +1717,7 @@ ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -1755,7 +1755,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 @@ -1764,7 +1764,7 @@ ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -1801,7 +1801,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: br label [[COMMON_RET]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 @@ -1810,7 +1810,7 @@ ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -1848,7 +1848,7 @@ ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65 @@ -1857,7 +1857,7 @@ ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -1894,13 +1894,13 @@ ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] ; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; entry: %.zero.addr = alloca i32, align 4 %.threadid_temp. = alloca i32, align 4 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -1912,7 +1912,7 @@ store i32 0, i32* %.zero.addr, align 4 store i32 %1, i32* %.threadid_temp., align 4, !tbaa !18 call void @__omp_outlined__8(i32* %.threadid_temp., i32* %.zero.addr) #6 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) br label %common.ret } @@ -1954,7 +1954,7 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: is_worker_check: @@ -2002,7 +2002,7 @@ ; AMDGPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]] ; AMDGPU-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) -; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 @@ -2010,7 +2010,7 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: is_worker_check: @@ -2057,7 +2057,7 @@ ; NVPTX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]] ; NVPTX-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) -; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-NEXT: br label [[COMMON_RET]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 @@ -2065,7 +2065,7 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: is_worker_check: @@ -2113,7 +2113,7 @@ ; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]] ; AMDGPU-DISABLED-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) -; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 @@ -2121,7 +2121,7 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: is_worker_check: @@ -2168,12 +2168,12 @@ ; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR4]] ; NVPTX-DISABLED-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) -; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; entry: %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -2187,7 +2187,7 @@ %4 = call i32 @__kmpc_omp_task(%struct.ident_t* @1, i32 %1, i8* %2) %5 = bitcast [0 x i8*]* %captured_vars_addrs to i8** call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__9_wrapper to i8*), i8** %5, i64 0) - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) br label %common.ret } @@ -2258,7 +2258,23 @@ declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) { +define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { +; AMDGPU-LABEL: define {{[^@]+}}@__kmpc_target_init +; AMDGPU-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; AMDGPU-NEXT: ret i32 0 +; +; NVPTX-LABEL: define {{[^@]+}}@__kmpc_target_init +; NVPTX-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; NVPTX-NEXT: ret i32 0 +; +; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init +; AMDGPU-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; AMDGPU-DISABLED-NEXT: ret i32 0 +; +; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init +; NVPTX-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; NVPTX-DISABLED-NEXT: ret i32 0 +; ret i32 0 } @@ -2276,7 +2292,7 @@ ; Function Attrs: nounwind declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #6 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) ; Function Attrs: alwaysinline convergent norecurse nounwind Index: llvm/test/Transforms/OpenMP/spmdization_assumes.ll =================================================================== --- llvm/test/Transforms/OpenMP/spmdization_assumes.ll +++ llvm/test/Transforms/OpenMP/spmdization_assumes.ll @@ -32,7 +32,7 @@ ; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false, i1 false) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: @@ -56,12 +56,12 @@ ; CHECK: region.exit: ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x i8*], [0 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull [[TMP4]], i64 0) #[[ATTR3]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR3]] +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2) #[[ATTR3]] ; CHECK-NEXT: br label [[COMMON_RET]] ; entry: %captured_vars_addrs = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3 %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -74,11 +74,11 @@ store double %call.i, double* %x, align 8, !tbaa !8 %2 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0 call void @__kmpc_parallel_51(%struct.ident_t* nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull %2, i64 0) #3 - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1, i1 true) #3 + call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1) #3 br label %common.ret } -declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) local_unnamed_addr +declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) local_unnamed_addr ; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn define internal void @__omp_outlined__(i32* noalias nocapture %.global_tid., i32* noalias nocapture %.bound_tid.) #1 { @@ -114,7 +114,7 @@ ; Function Attrs: alwaysinline declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr #4 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr ; Function Attrs: convergent declare double @__nv_sin(double) local_unnamed_addr #5 Index: llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll =================================================================== --- llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll +++ llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll @@ -3,7 +3,7 @@ ; Verify we change it to SPMD mode but also avoid propagating the old mode (=generic) into the __kmpc_target_init function. ; ; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode -; CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 2, i1 false, i1 false) +; CHECK: call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 2, i1 false) ; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode ; CHECK: store i32 1, ptr addrspace(3) @IsSPMDMode ; CHECK-NOT: store i32 0, ptr addrspace(3) @IsSPMDMode @@ -41,7 +41,7 @@ entry: %ng1 = alloca i32, align 4 %captured_vars_addrs = alloca [2 x ptr], align 8, addrspace(5) - %0 = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %common.ret @@ -49,7 +49,7 @@ %captured_vars_addrs.ascast = addrspacecast ptr addrspace(5) %captured_vars_addrs to ptr store ptr %ng1, ptr addrspace(5) %captured_vars_addrs, align 8, !tbaa !7 call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i32 0, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs.ascast, i64 2) - call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1, i1 true) + call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @1 to ptr), i8 1) br label %common.ret common.ret: ; preds = %user_code.entry, %entry @@ -104,8 +104,9 @@ } ; Function Attrs: convergent nounwind -define internal i32 @__kmpc_target_init(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode, i1 noundef zeroext %UseGenericStateMachine, i1 noundef zeroext %0) local_unnamed_addr #9 { +define internal i32 @__kmpc_target_init(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode, i1 noundef zeroext %UseGenericStateMachine) local_unnamed_addr #9 { entry: + %0 = and i32 undef, undef %1 = and i8 %Mode, 2 %tobool.not = icmp eq i8 %1, 0 br i1 %tobool.not, label %if.else, label %if.then @@ -260,7 +261,7 @@ } ; Function Attrs: nounwind -define internal void @__kmpc_target_deinit(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode, i1 noundef zeroext %0) local_unnamed_addr #10 { +define internal void @__kmpc_target_deinit(ptr nocapture noundef readnone %Ident, i8 noundef signext %Mode) local_unnamed_addr #10 { ret void } Index: llvm/test/Transforms/OpenMP/spmdization_guarding.ll =================================================================== --- llvm/test/Transforms/OpenMP/spmdization_guarding.ll +++ llvm/test/Transforms/OpenMP/spmdization_guarding.ll @@ -61,7 +61,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[HEAP2STACK_H2S:%.*]] = alloca i8, i64 8, align 8 ; CHECK-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false, i1 false) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: @@ -169,7 +169,7 @@ ; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 ; CHECK-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 ; CHECK-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2, i1 false) #[[ATTR6]] +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 2) #[[ATTR6]] ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void @@ -180,7 +180,7 @@ ; CHECK-DISABLED-NEXT: [[HEAP2STACK_H2S:%.*]] = alloca i8, i64 8, align 8 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 ; CHECK-DISABLED-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32 -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 1, i1 false, i1 true) #[[ATTR6:[0-9]+]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i8 1, i1 false) #[[ATTR6:[0-9]+]] ; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: is_worker_check: @@ -256,14 +256,14 @@ ; CHECK-DISABLED-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 ; CHECK-DISABLED-NEXT: [[CALL15_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 ; CHECK-DISABLED-NEXT: [[CALL16_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR10]], !noalias !8 -; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 1, i1 true) #[[ATTR6]] +; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i8 1) #[[ATTR6]] ; CHECK-DISABLED-NEXT: ret void ; CHECK-DISABLED: worker.exit: ; CHECK-DISABLED-NEXT: ret void ; entry: %N.addr.sroa.0.0.extract.trunc = trunc i64 %N to i32 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3 %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit @@ -312,7 +312,7 @@ %call14.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8 %call15.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8 %call16.i = call i32 @no_openmp(i32* nonnull %x) #5, !noalias !8 - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1, i1 true) #3 + call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i8 1) #3 ret void worker.exit: ; preds = %entry @@ -345,7 +345,15 @@ declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) { +define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { +; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init +; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-NEXT: ret i32 0 +; +; CHECK-DISABLED-LABEL: define {{[^@]+}}@__kmpc_target_init +; CHECK-DISABLED-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-DISABLED-NEXT: ret i32 0 +; ret i32 0 } @@ -362,7 +370,7 @@ ; Function Attrs: nounwind declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn declare void @llvm.experimental.noalias.scope.decl(metadata) #4 Index: llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll =================================================================== --- llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll +++ llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll @@ -62,12 +62,12 @@ ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: call void @spmd_helper() #[[ATTR5:[0-9]+]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void @@ -76,7 +76,7 @@ ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLE-SPMDIZATION: is_worker_check: @@ -114,19 +114,19 @@ ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLE-SPMDIZATION: user_code.entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @spmd_helper() #[[ATTR5:[0-9]+]] -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; CHECK-DISABLE-SPMDIZATION: worker.exit: ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry call void @spmd_helper() #5 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry @@ -134,18 +134,26 @@ } ; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning. -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) { +define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { +; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init +; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-NEXT: ret i32 0 +; +; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__kmpc_target_init +; CHECK-DISABLE-SPMDIZATION-SAME: (%struct.ident_t* [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[TMP2:%.*]]) { +; CHECK-DISABLE-SPMDIZATION-NEXT: ret i32 0 +; ret i32 0 } -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) ; Function Attrs: convergent noinline norecurse nounwind define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; CHECK-NEXT: [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() ; CHECK-NEXT: [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0 ; CHECK-NEXT: br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]] @@ -156,7 +164,7 @@ ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: ; CHECK-NEXT: call void @generic_helper() #[[ATTR5]] -; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) ; CHECK-NEXT: ret void ; CHECK: worker.exit: ; CHECK-NEXT: ret void @@ -164,24 +172,24 @@ ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20 ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR0]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: -; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) +; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false) ; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLE-SPMDIZATION: user_code.entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR5]] -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; CHECK-DISABLE-SPMDIZATION: worker.exit: ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: - %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true, i1 true) + %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i8 1, i1 true) %exec_user_code = icmp eq i32 %0, -1 br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry call void @generic_helper() #5 - call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) + call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1) ret void worker.exit: ; preds = %entry Index: llvm/test/Transforms/OpenMP/spmdization_remarks.ll =================================================================== --- llvm/test/Transforms/OpenMP/spmdization_remarks.ll +++ llvm/test/Transforms/OpenMP/spmdization_remarks.ll @@ -62,7 +62,7 @@ define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 { entry: %captured_vars_addrs.i.i = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3, !dbg !18 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true) #3, !dbg !18 %exec_user_code = icmp eq i32 %0, -1, !dbg !18 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18 @@ -79,11 +79,11 @@ call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23 call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26 call void @unknown() #6, !dbg !27 - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1, i1 true) #3, !dbg !28 + call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1) #3, !dbg !28 br label %common.ret } -define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) { +define weak i32 @__kmpc_target_init(%struct.ident_t*, i8, i1) { ret i32 0 } @@ -104,13 +104,13 @@ ; Function Attrs: nounwind declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3 -declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr +declare void @__kmpc_target_deinit(%struct.ident_t*, i8) local_unnamed_addr ; Function Attrs: norecurse nounwind define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 { entry: %captured_vars_addrs.i2.i = alloca [0 x i8*], align 8 - %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true, i1 true) #3, !dbg !33 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true) #3, !dbg !33 %exec_user_code = icmp eq i32 %0, -1, !dbg !33 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33 @@ -134,7 +134,7 @@ call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %6, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !43 call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45 call void @spmd_amenable() - call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1, i1 true) #3, !dbg !46 + call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1) #3, !dbg !46 br label %common.ret } Index: openmp/libomptarget/DeviceRTL/include/Interface.h =================================================================== --- openmp/libomptarget/DeviceRTL/include/Interface.h +++ openmp/libomptarget/DeviceRTL/include/Interface.h @@ -217,9 +217,9 @@ int8_t __kmpc_is_spmd_exec_mode(); int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, - bool UseGenericStateMachine, bool); + bool UseGenericStateMachine); -void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool); +void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode); ///} Index: openmp/libomptarget/DeviceRTL/src/Kernel.cpp =================================================================== --- openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -66,7 +66,7 @@ /// \param Ident Source location identification, can be NULL. /// int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, - bool UseGenericStateMachine, bool) { + bool UseGenericStateMachine) { FunctionTracingRAII(); const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; if (IsSPMD) { @@ -125,7 +125,7 @@ /// /// \param Ident Source location identification, can be NULL. /// -void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool) { +void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode) { FunctionTracingRAII(); const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; state::assumeInitialState(IsSPMD);