diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -795,9 +795,14 @@ llvm::Type *getKmpc_MicroPointerTy(); /// Returns __kmpc_for_static_init_* runtime function for the specified - /// size \a IVSize and sign \a IVSigned. + /// size \a IVSize and sign \a IVSigned. Will create a distribute call + /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set. llvm::FunctionCallee createForStaticInitFunction(unsigned IVSize, - bool IVSigned); + bool IVSigned, + bool IsGPUDistribute); + + llvm::FunctionCallee createDistributeStaticInitFunction(unsigned IVSize, + bool IVSigned); /// Returns __kmpc_dispatch_init_* runtime function for the specified /// size \a IVSize and sign \a IVSigned. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1560,13 +1560,22 @@ } llvm::FunctionCallee -CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { +CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, + bool IsGPUDistribute) { assert((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime"); - StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" - : "__kmpc_for_static_init_4u") - : (IVSigned ? "__kmpc_for_static_init_8" - : "__kmpc_for_static_init_8u"); + StringRef Name; + if (IsGPUDistribute) + Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" + : "__kmpc_distribute_static_init_4u") + : (IVSigned ? "__kmpc_distribute_static_init_8" + : "__kmpc_distribute_static_init_8u"); + else + Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" + : "__kmpc_for_static_init_4u") + : (IVSigned ? "__kmpc_for_static_init_8" + : "__kmpc_for_static_init_8u"); + llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; auto *PtrTy = llvm::PointerType::getUnqual(ITy); llvm::Type *TypeParams[] = { @@ -2826,7 +2835,7 @@ : OMP_IDENT_WORK_SECTIONS); llvm::Value *ThreadId = getThreadID(CGF, Loc); llvm::FunctionCallee StaticInitFunction = - createForStaticInitFunction(Values.IVSize, Values.IVSigned); + createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); @@ -2841,8 +2850,13 @@ llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); llvm::Value *ThreadId = getThreadID(CGF, Loc); - llvm::FunctionCallee StaticInitFunction = - createForStaticInitFunction(Values.IVSize, Values.IVSigned); + llvm::FunctionCallee StaticInitFunction; + bool isGPUDistribute = + CGM.getLangOpts().OpenMPIsDevice && + (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); + StaticInitFunction = createForStaticInitFunction( + Values.IVSize, Values.IVSigned, isGPUDistribute); + emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, OMPC_SCHEDULE_MODIFIER_unknown, Values); @@ -2863,9 +2877,16 @@ : OMP_IDENT_WORK_SECTIONS), getThreadID(CGF, Loc)}; auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_for_static_fini), - Args); + if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && + (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), + Args); + else + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_for_static_fini), + Args); } void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -3005,7 +3005,7 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[CONV1]], align 4 // CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i64 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR1:[0-9]+]] +// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i64 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR5:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK4-NEXT: ret void // CHECK4: worker.exit: @@ -3071,7 +3071,7 @@ // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] @@ -3162,7 +3162,7 @@ // CHECK4: omp.loop.exit: // CHECK4-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP54]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP54]]) // CHECK4-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 // CHECK4-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] @@ -3259,23 +3259,23 @@ // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I6]]) #[[ATTR5:[0-9]+]] -// CHECK4-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I6]]) #[[ATTR7:[0-9]+]] +// CHECK4-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR7]] // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]] // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 // CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B4]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: [[CALL11:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] +// CHECK4-NEXT: [[CALL11:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR7]] // CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]] // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 // CHECK4-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP20]] to i64 // CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C5]], i64 0, i64 [[IDXPROM13]] -// CHECK4-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] +// CHECK4-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR7]] // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]] // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4 // CHECK4-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP21]] to i64 // CHECK4-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM17]] -// CHECK4-NEXT: [[CALL19:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX18]]) #[[ATTR5]] +// CHECK4-NEXT: [[CALL19:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX18]]) #[[ATTR7]] // CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]] // CHECK4-NEXT: store i32 [[ADD20]], i32* [[TMP1]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -3292,7 +3292,7 @@ // CHECK4: omp.loop.exit: // CHECK4-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]]) // CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK4-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] @@ -3337,7 +3337,7 @@ // CHECK5-NEXT: store i32 [[TMP6]], i32* [[ARGC_CASTED]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR1:[0-9]+]] +// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR5:[0-9]+]] // CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK5-NEXT: ret void // CHECK5: worker.exit: @@ -3402,7 +3402,7 @@ // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] @@ -3491,7 +3491,7 @@ // CHECK5: omp.loop.exit: // CHECK5-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP52]]) +// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP52]]) // CHECK5-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 // CHECK5-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] @@ -3585,20 +3585,20 @@ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK5-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] -// CHECK5-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK5-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR7:[0-9]+]] +// CHECK5-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR7]] // CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] -// CHECK5-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] +// CHECK5-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR7]] // CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] // CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 // CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] -// CHECK5-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] +// CHECK5-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR7]] // CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] // CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 // CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] -// CHECK5-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] +// CHECK5-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR7]] // CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] // CHECK5-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -3615,7 +3615,7 @@ // CHECK5: omp.loop.exit: // CHECK5-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]]) +// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]]) // CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] @@ -3660,7 +3660,7 @@ // CHECK6-NEXT: store i32 [[TMP6]], i32* [[ARGC_CASTED]], align 4 // CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR1:[0-9]+]] +// CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR5:[0-9]+]] // CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK6-NEXT: ret void // CHECK6: worker.exit: @@ -3725,7 +3725,7 @@ // CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK6-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK6-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK6-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] @@ -3814,7 +3814,7 @@ // CHECK6: omp.loop.exit: // CHECK6-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP52]]) +// CHECK6-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP52]]) // CHECK6-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK6-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 // CHECK6-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] @@ -3908,20 +3908,20 @@ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK6-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] -// CHECK6-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK6-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR7:[0-9]+]] +// CHECK6-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR7]] // CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] // CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] -// CHECK6-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] +// CHECK6-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR7]] // CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] // CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 // CHECK6-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] -// CHECK6-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] +// CHECK6-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR7]] // CHECK6-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] // CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 // CHECK6-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] -// CHECK6-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] +// CHECK6-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR7]] // CHECK6-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] // CHECK6-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] @@ -3938,7 +3938,7 @@ // CHECK6: omp.loop.exit: // CHECK6-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]]) +// CHECK6-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]]) // CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK6-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK6-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -42,7 +42,7 @@ // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -73,24 +73,24 @@ // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* // CHECK1-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR5]] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR5]] // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -118,16 +118,16 @@ // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP14:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP14]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP14]]) #[[ATTR5]] // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14:![0-9]+]] // CHECK1-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR5]] // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR9:[0-9]+]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR11:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 // CHECK1-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] @@ -157,19 +157,19 @@ // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) // CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 8) // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) @@ -188,7 +188,7 @@ // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR9]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR11]] // CHECK1-NEXT: ret void // // @@ -227,17 +227,17 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] @@ -247,44 +247,44 @@ // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR5]] // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14]] // CHECK1-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]] // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR9]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR11]] // CHECK1-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) @@ -329,25 +329,25 @@ // CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP41:%.*]] = bitcast float* [[REF_TMP15]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to float // CHECK1-NEXT: store float [[CONV]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA14]] // CHECK1-NEXT: [[TMP43:%.*]] = bitcast float* [[REF_TMP16]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float // CHECK1-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR9]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR9]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR11]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR11]] // CHECK1-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -382,32 +382,32 @@ // CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR9]] +// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR11]] // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: // CHECK1-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -420,13 +420,13 @@ // CHECK1-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR9]] +// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR11]] // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] // CHECK1-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR9]] +// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR11]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] @@ -435,7 +435,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -485,7 +485,7 @@ // CHECK1: then: // CHECK1-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK1-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR5]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] @@ -512,7 +512,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -573,7 +573,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -593,7 +593,7 @@ // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** // CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -609,7 +609,7 @@ // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -640,24 +640,24 @@ // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 16) // CHECK1-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.0"* // CHECK1-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR5]] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR5]] // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -685,16 +685,16 @@ // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP14:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP14]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP14]]) #[[ATTR5]] // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22:![0-9]+]] // CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR5]] // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR9]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR11]] // CHECK1-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP17]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP17]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 // CHECK1-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] @@ -724,19 +724,19 @@ // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) // CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 16) // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) @@ -755,7 +755,7 @@ // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR9]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR11]] // CHECK1-NEXT: ret void // // @@ -794,17 +794,17 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] @@ -814,44 +814,44 @@ // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR5]] // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22]] // CHECK1-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]] // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR9]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR11]] // CHECK1-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) @@ -896,25 +896,25 @@ // CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP41:%.*]] = bitcast double* [[REF_TMP15]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to double // CHECK1-NEXT: store double [[CONV]], double* [[REF_TMP15]], align 8, !tbaa [[TBAA22]] // CHECK1-NEXT: [[TMP43:%.*]] = bitcast double* [[REF_TMP16]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double // CHECK1-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR9]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR9]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR11]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR11]] // CHECK1-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -949,32 +949,32 @@ // CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR9]] +// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR11]] // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: // CHECK1-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] // CHECK1-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR1]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -987,13 +987,13 @@ // CHECK1-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR9]] +// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR11]] // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK1-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR9]] +// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR11]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] @@ -1002,7 +1002,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -1066,7 +1066,7 @@ // CHECK1: then: // CHECK1-NEXT: [[TMP43:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK1-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR1]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR5]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] @@ -1093,7 +1093,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -1154,7 +1154,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -1174,7 +1174,7 @@ // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** // CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.0"* [[TMP11]]) #[[ATTR1]] +// CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.0"* [[TMP11]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -1276,7 +1276,7 @@ // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1307,24 +1307,24 @@ // CHECK2-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) // CHECK2-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* // CHECK2-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR5]] // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR5]] // CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -1352,16 +1352,16 @@ // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP14:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP14]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP14]]) #[[ATTR5]] // CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14:![0-9]+]] // CHECK2-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR5]] // CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] -// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR9:[0-9]+]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR11:[0-9]+]] // CHECK2-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 // CHECK2-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] @@ -1391,19 +1391,19 @@ // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) // CHECK2-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 8) // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) @@ -1422,7 +1422,7 @@ // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK2-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR9]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR11]] // CHECK2-NEXT: ret void // // @@ -1461,17 +1461,17 @@ // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] @@ -1481,44 +1481,44 @@ // CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR5]] // CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14]] // CHECK2-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]] // CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] -// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR9]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR11]] // CHECK2-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) @@ -1563,25 +1563,25 @@ // CHECK2-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] // CHECK2-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP41:%.*]] = bitcast float* [[REF_TMP15]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to float // CHECK2-NEXT: store float [[CONV]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA14]] // CHECK2-NEXT: [[TMP43:%.*]] = bitcast float* [[REF_TMP16]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float // CHECK2-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] -// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR9]] -// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR9]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR11]] +// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR11]] // CHECK2-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR5]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1616,32 +1616,32 @@ // CHECK2-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK2-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR9]] +// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR11]] // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: // CHECK2-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] // CHECK2-NEXT: ret void // // @@ -1654,13 +1654,13 @@ // CHECK2-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR9]] +// CHECK2-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR11]] // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] // CHECK2-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR9]] +// CHECK2-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR11]] // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 // CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK2-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] @@ -1669,7 +1669,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -1719,7 +1719,7 @@ // CHECK2: then: // CHECK2-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK2-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR5]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] @@ -1746,7 +1746,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -1807,7 +1807,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -1827,7 +1827,7 @@ // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** // CHECK2-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR5]] // CHECK2-NEXT: ret void // // @@ -1843,7 +1843,7 @@ // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] -// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1874,24 +1874,24 @@ // CHECK2-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 16) // CHECK2-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.0"* // CHECK2-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR5]] // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR5]] // CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -1919,16 +1919,16 @@ // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP14:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP14]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP14]]) #[[ATTR5]] // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22:![0-9]+]] // CHECK2-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR5]] // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR9]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR11]] // CHECK2-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP17]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP17]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 // CHECK2-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] @@ -1958,19 +1958,19 @@ // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) // CHECK2-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 16) // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) @@ -1989,7 +1989,7 @@ // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 -// CHECK2-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR9]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR11]] // CHECK2-NEXT: ret void // // @@ -2028,17 +2028,17 @@ // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] @@ -2048,44 +2048,44 @@ // CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR5]] // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22]] // CHECK2-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]] // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR9]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR11]] // CHECK2-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) @@ -2130,25 +2130,25 @@ // CHECK2-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] // CHECK2-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP41:%.*]] = bitcast double* [[REF_TMP15]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to double // CHECK2-NEXT: store double [[CONV]], double* [[REF_TMP15]], align 8, !tbaa [[TBAA22]] // CHECK2-NEXT: [[TMP43:%.*]] = bitcast double* [[REF_TMP16]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double // CHECK2-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR9]] -// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR9]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR11]] +// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR11]] // CHECK2-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR5]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2183,32 +2183,32 @@ // CHECK2-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK2-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR9]] +// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR11]] // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: // CHECK2-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] // CHECK2-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR1]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] // CHECK2-NEXT: ret void // // @@ -2221,13 +2221,13 @@ // CHECK2-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR9]] +// CHECK2-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR11]] // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK2-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR9]] +// CHECK2-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR11]] // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK2-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] @@ -2236,7 +2236,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR5]] { +// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -2300,7 +2300,7 @@ // CHECK2: then: // CHECK2-NEXT: [[TMP43:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK2-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR1]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR5]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] @@ -2327,7 +2327,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -2388,7 +2388,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -2408,7 +2408,7 @@ // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** // CHECK2-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8, !tbaa [[TBAA12]] -// CHECK2-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.0"* [[TMP11]]) #[[ATTR1]] +// CHECK2-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.0"* [[TMP11]]) #[[ATTR5]] // CHECK2-NEXT: ret void // // @@ -2510,7 +2510,7 @@ // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -2541,24 +2541,24 @@ // CHECK3-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) // CHECK3-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* // CHECK3-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR5]] // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR5]] // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -2586,16 +2586,16 @@ // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP14:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP14]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP14]]) #[[ATTR5]] // CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14:![0-9]+]] // CHECK3-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR5]] // CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] -// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR9:[0-9]+]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR11:[0-9]+]] // CHECK3-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 // CHECK3-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] @@ -2625,19 +2625,19 @@ // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) // CHECK3-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 8) // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) @@ -2656,7 +2656,7 @@ // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK3-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR9]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR11]] // CHECK3-NEXT: ret void // // @@ -2695,17 +2695,17 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] @@ -2715,44 +2715,44 @@ // CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR5]] // CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14]] // CHECK3-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]] // CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] -// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR9]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR11]] // CHECK3-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) @@ -2797,25 +2797,25 @@ // CHECK3-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] // CHECK3-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP41:%.*]] = bitcast float* [[REF_TMP15]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to float // CHECK3-NEXT: store float [[CONV]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA14]] // CHECK3-NEXT: [[TMP43:%.*]] = bitcast float* [[REF_TMP16]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float // CHECK3-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] -// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR9]] -// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR9]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR11]] +// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR11]] // CHECK3-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2850,32 +2850,32 @@ // CHECK3-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK3-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR9]] +// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR11]] // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: // CHECK3-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] // CHECK3-NEXT: ret void // // @@ -2888,13 +2888,13 @@ // CHECK3-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR9]] +// CHECK3-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR11]] // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK3-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] // CHECK3-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR9]] +// CHECK3-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR11]] // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK3-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] @@ -2903,7 +2903,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -2953,7 +2953,7 @@ // CHECK3: then: // CHECK3-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK3-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR5]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] @@ -2980,7 +2980,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -3041,7 +3041,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -3061,7 +3061,7 @@ // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** // CHECK3-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR5]] // CHECK3-NEXT: ret void // // @@ -3077,7 +3077,7 @@ // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] -// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3108,24 +3108,24 @@ // CHECK3-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 16) // CHECK3-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.0"* // CHECK3-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR5]] // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR5]] // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -3153,16 +3153,16 @@ // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP14:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP14]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP14]]) #[[ATTR5]] // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22:![0-9]+]] // CHECK3-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR5]] // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR9]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR11]] // CHECK3-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP17]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP17]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 // CHECK3-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] @@ -3192,19 +3192,19 @@ // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) // CHECK3-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 16) // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) @@ -3223,7 +3223,7 @@ // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 -// CHECK3-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR9]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR11]] // CHECK3-NEXT: ret void // // @@ -3262,17 +3262,17 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] @@ -3282,44 +3282,44 @@ // CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR5]] // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22]] // CHECK3-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]] // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR9]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR11]] // CHECK3-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) @@ -3364,25 +3364,25 @@ // CHECK3-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] // CHECK3-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP41:%.*]] = bitcast double* [[REF_TMP15]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to double // CHECK3-NEXT: store double [[CONV]], double* [[REF_TMP15]], align 8, !tbaa [[TBAA22]] // CHECK3-NEXT: [[TMP43:%.*]] = bitcast double* [[REF_TMP16]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double // CHECK3-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR9]] -// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR9]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR11]] +// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR11]] // CHECK3-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3417,32 +3417,32 @@ // CHECK3-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK3-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR9]] +// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR11]] // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: // CHECK3-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] // CHECK3-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR1]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] // CHECK3-NEXT: ret void // // @@ -3455,13 +3455,13 @@ // CHECK3-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR9]] +// CHECK3-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR11]] // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK3-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR9]] +// CHECK3-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR11]] // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK3-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] @@ -3470,7 +3470,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR6]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -3534,7 +3534,7 @@ // CHECK3: then: // CHECK3-NEXT: [[TMP43:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK3-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR1]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR5]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] @@ -3561,7 +3561,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -3622,7 +3622,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -3642,7 +3642,7 @@ // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** // CHECK3-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8, !tbaa [[TBAA12]] -// CHECK3-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.0"* [[TMP11]]) #[[ATTR1]] +// CHECK3-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.0"* [[TMP11]]) #[[ATTR5]] // CHECK3-NEXT: ret void // // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -615,7 +615,7 @@ // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -656,7 +656,7 @@ // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[I]], i64 4) // CHECK1-NEXT: ret void // @@ -737,7 +737,7 @@ // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -778,7 +778,7 @@ // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[I]], i32 4) // CHECK2-NEXT: ret void // @@ -859,7 +859,7 @@ // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -900,7 +900,7 @@ // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[I]], i32 4) // CHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -18555,7 +18555,7 @@ // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -18645,7 +18645,7 @@ // CHECK1: omp.loop.exit: // CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]]) // CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 // CHECK1-NEXT: br i1 [[TMP49]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] @@ -18780,7 +18780,7 @@ // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] @@ -18869,7 +18869,7 @@ // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -18952,7 +18952,7 @@ // CHECK1: omp.loop.exit: // CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP43]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP43]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -19047,7 +19047,7 @@ // CHECK1: omp.loop.exit: // CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -19100,7 +19100,7 @@ // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -19166,7 +19166,7 @@ // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -19236,7 +19236,7 @@ // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -19302,7 +19302,7 @@ // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -19375,7 +19375,7 @@ // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -19469,7 +19469,7 @@ // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -19566,7 +19566,7 @@ // CHECK1-NEXT: [[CONV12:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 // CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV12]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV12]]) // CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] @@ -19647,7 +19647,7 @@ // CHECK1: omp.loop.exit: // CHECK1-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP44]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP44]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -19784,7 +19784,7 @@ // CHECK1: omp.loop.exit: // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -19870,7 +19870,7 @@ // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -19957,7 +19957,7 @@ // CHECK1: omp.loop.exit: // CHECK1-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -20055,7 +20055,7 @@ // CHECK1: omp.loop.exit: // CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -20149,7 +20149,7 @@ // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -20239,7 +20239,7 @@ // CHECK2: omp.loop.exit: // CHECK2-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]]) // CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 // CHECK2-NEXT: br i1 [[TMP49]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] @@ -20374,7 +20374,7 @@ // CHECK2: omp.dispatch.end: // CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] @@ -20463,7 +20463,7 @@ // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -20546,7 +20546,7 @@ // CHECK2: omp.loop.exit: // CHECK2-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP43]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP43]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -20641,7 +20641,7 @@ // CHECK2: omp.loop.exit: // CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -20694,7 +20694,7 @@ // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -20760,7 +20760,7 @@ // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK2-NEXT: ret void // // @@ -20830,7 +20830,7 @@ // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK2-NEXT: ret void // // @@ -20896,7 +20896,7 @@ // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -20969,7 +20969,7 @@ // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK2-NEXT: ret void // // @@ -21063,7 +21063,7 @@ // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK2-NEXT: ret void // // @@ -21157,7 +21157,7 @@ // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] @@ -21240,7 +21240,7 @@ // CHECK2: omp.loop.exit: // CHECK2-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -21373,7 +21373,7 @@ // CHECK2: omp.loop.exit: // CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -21459,7 +21459,7 @@ // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -21546,7 +21546,7 @@ // CHECK2: omp.loop.exit: // CHECK2-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -21644,7 +21644,7 @@ // CHECK2: omp.loop.exit: // CHECK2-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -21732,7 +21732,7 @@ // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -21818,7 +21818,7 @@ // CHECK3: omp.loop.exit: // CHECK3-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) // CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 // CHECK3-NEXT: br i1 [[TMP47]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] @@ -21946,7 +21946,7 @@ // CHECK3: omp.dispatch.end: // CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) // CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] @@ -22032,7 +22032,7 @@ // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -22112,7 +22112,7 @@ // CHECK3: omp.loop.exit: // CHECK3-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP41]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP41]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -22202,7 +22202,7 @@ // CHECK3: omp.loop.exit: // CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -22255,7 +22255,7 @@ // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -22319,7 +22319,7 @@ // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -22385,7 +22385,7 @@ // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -22448,7 +22448,7 @@ // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -22518,7 +22518,7 @@ // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -22606,7 +22606,7 @@ // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -22700,7 +22700,7 @@ // CHECK3-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 // CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) // CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK3-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] @@ -22782,7 +22782,7 @@ // CHECK3: omp.loop.exit: // CHECK3-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -22919,7 +22919,7 @@ // CHECK3: omp.loop.exit: // CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -23002,7 +23002,7 @@ // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -23086,7 +23086,7 @@ // CHECK3: omp.loop.exit: // CHECK3-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP44]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP44]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -23178,7 +23178,7 @@ // CHECK3: omp.loop.exit: // CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -23266,7 +23266,7 @@ // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -23352,7 +23352,7 @@ // CHECK4: omp.loop.exit: // CHECK4-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) // CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 // CHECK4-NEXT: br i1 [[TMP47]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] @@ -23480,7 +23480,7 @@ // CHECK4: omp.dispatch.end: // CHECK4-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) // CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK4-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] @@ -23566,7 +23566,7 @@ // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -23646,7 +23646,7 @@ // CHECK4: omp.loop.exit: // CHECK4-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP41]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP41]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void @@ -23736,7 +23736,7 @@ // CHECK4: omp.loop.exit: // CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void @@ -23789,7 +23789,7 @@ // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -23853,7 +23853,7 @@ // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // // @@ -23919,7 +23919,7 @@ // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK4-NEXT: ret void // // @@ -23982,7 +23982,7 @@ // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -24052,7 +24052,7 @@ // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // // @@ -24140,7 +24140,7 @@ // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK4-NEXT: ret void // // @@ -24234,7 +24234,7 @@ // CHECK4-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 // CHECK4-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) // CHECK4-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK4-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK4-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] @@ -24316,7 +24316,7 @@ // CHECK4: omp.loop.exit: // CHECK4-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP46]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void @@ -24453,7 +24453,7 @@ // CHECK4: omp.loop.exit: // CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void @@ -24536,7 +24536,7 @@ // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -24620,7 +24620,7 @@ // CHECK4: omp.loop.exit: // CHECK4-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP44]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP44]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void @@ -24712,7 +24712,7 @@ // CHECK4: omp.loop.exit: // CHECK4-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -116,7 +116,7 @@ // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -206,7 +206,7 @@ // CHECK1: omp.loop.exit: // CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -333,7 +333,7 @@ // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -420,7 +420,7 @@ // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -506,7 +506,7 @@ // CHECK2: omp.loop.exit: // CHECK2-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -627,7 +627,7 @@ // CHECK2: omp.dispatch.end: // CHECK2-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -714,7 +714,7 @@ // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -800,7 +800,7 @@ // CHECK3: omp.loop.exit: // CHECK3-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -921,7 +921,7 @@ // CHECK3: omp.dispatch.end: // CHECK3-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -1014,7 +1014,7 @@ // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -1104,7 +1104,7 @@ // CHECK4: omp.loop.exit: // CHECK4-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void @@ -1231,7 +1231,7 @@ // CHECK4: omp.dispatch.end: // CHECK4-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void @@ -1318,7 +1318,7 @@ // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK5-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -1404,7 +1404,7 @@ // CHECK5: omp.loop.exit: // CHECK5-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) +// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void @@ -1525,7 +1525,7 @@ // CHECK5: omp.dispatch.end: // CHECK5-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) +// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void @@ -1612,7 +1612,7 @@ // CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK6-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK6-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK6-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -1698,7 +1698,7 @@ // CHECK6: omp.loop.exit: // CHECK6-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) +// CHECK6-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void @@ -1819,7 +1819,7 @@ // CHECK6: omp.dispatch.end: // CHECK6-NEXT: [[TMP25:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) +// CHECK6-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP26]]) // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -9468,7 +9468,7 @@ // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -9558,7 +9558,7 @@ // CHECK1: omp.loop.exit: // CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP47]]) // CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 // CHECK1-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -9705,7 +9705,7 @@ // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -9806,7 +9806,7 @@ // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -9889,7 +9889,7 @@ // CHECK1: omp.loop.exit: // CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP43]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP43]]) // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 // CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -9996,7 +9996,7 @@ // CHECK1: omp.loop.exit: // CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 // CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -10061,7 +10061,7 @@ // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -10127,7 +10127,7 @@ // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 // CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -10204,7 +10204,7 @@ // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -10277,7 +10277,7 @@ // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -10350,7 +10350,7 @@ // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 // CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -10452,7 +10452,7 @@ // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 // CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -10546,7 +10546,7 @@ // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -10632,7 +10632,7 @@ // CHECK2: omp.loop.exit: // CHECK2-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) // CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 // CHECK2-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -10772,7 +10772,7 @@ // CHECK2: omp.dispatch.end: // CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -10870,7 +10870,7 @@ // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -10950,7 +10950,7 @@ // CHECK2: omp.loop.exit: // CHECK2-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP41]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP41]]) // CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 // CHECK2-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -11052,7 +11052,7 @@ // CHECK2: omp.loop.exit: // CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 // CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -11117,7 +11117,7 @@ // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -11181,7 +11181,7 @@ // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK2-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -11254,7 +11254,7 @@ // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -11324,7 +11324,7 @@ // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -11394,7 +11394,7 @@ // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK2-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -11490,7 +11490,7 @@ // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 // CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -11584,7 +11584,7 @@ // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -11670,7 +11670,7 @@ // CHECK3: omp.loop.exit: // CHECK3-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP45]]) // CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 // CHECK3-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -11810,7 +11810,7 @@ // CHECK3: omp.dispatch.end: // CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) // CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -11908,7 +11908,7 @@ // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] @@ -11988,7 +11988,7 @@ // CHECK3: omp.loop.exit: // CHECK3-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP41]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP41]]) // CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 // CHECK3-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -12090,7 +12090,7 @@ // CHECK3: omp.loop.exit: // CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP18]]) // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 // CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -12155,7 +12155,7 @@ // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -12219,7 +12219,7 @@ // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -12292,7 +12292,7 @@ // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -12362,7 +12362,7 @@ // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -12432,7 +12432,7 @@ // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] @@ -12528,7 +12528,7 @@ // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 // CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp @@ -73,24 +73,24 @@ // CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false) -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, -// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, +// CHECK: call void @__kmpc_distribute_static_fini( // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l43( // CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false) -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, -// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, +// CHECK: call void @__kmpc_distribute_static_fini( // CHECK: ret void // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}_l48( // CHECK: call i32 @__kmpc_target_init({{.*}}, i8 2, i1 false, i1 false) // CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false) -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, -// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, +// CHECK: call void @__kmpc_distribute_static_fini( // CHECK: ret void // CHECK: define {{.*}}void {{@__omp_offloading_.+}}_l53({{.+}}, i{{32|64}} [[F_IN:%.+]]) @@ -99,8 +99,8 @@ // CHECK: call void @__kmpc_target_deinit({{.*}}, i8 2, i1 false) // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], -// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_distribute_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], +// CHECK: call void @__kmpc_distribute_static_fini( // CHECK: ret void #endif diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -271,6 +271,15 @@ __OMP_RTL(__kmpc_for_static_init_8u, false, Void, IdentPtr, Int32, Int32, Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64) __OMP_RTL(__kmpc_for_static_fini, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_distribute_static_init_4, false, Void, IdentPtr, Int32, Int32, + Int32Ptr, Int32Ptr, Int32Ptr, Int32Ptr, Int32, Int32) +__OMP_RTL(__kmpc_distribute_static_init_4u, false, Void, IdentPtr, Int32, Int32, + Int32Ptr, Int32Ptr, Int32Ptr, Int32Ptr, Int32, Int32) +__OMP_RTL(__kmpc_distribute_static_init_8, false, Void, IdentPtr, Int32, Int32, + Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64) +__OMP_RTL(__kmpc_distribute_static_init_8u, false, Void, IdentPtr, Int32, Int32, + Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64) +__OMP_RTL(__kmpc_distribute_static_fini, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_dist_dispatch_init_4, false, Void, IdentPtr, Int32, Int32, Int32Ptr, Int32, Int32, Int32, Int32) __OMP_RTL(__kmpc_dist_dispatch_init_4u, false, Void, IdentPtr, Int32, Int32, @@ -712,6 +721,28 @@ AttributeSet(), AttributeSet())) __OMP_RTL_ATTRS(__kmpc_for_static_fini, InaccessibleArgOnlyAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_distribute_static_init_4, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_distribute_static_init_4u, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_distribute_static_init_8, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_distribute_static_init_8u, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_distribute_static_fini, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, GetterArgWriteAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),