diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -1205,14 +1205,18 @@ /// \param Allowed If not null, a set limiting the attribute opportunities. /// \param DeleteFns Whether to delete functions. /// \param RewriteSignatures Whether to rewrite function signatures. + /// \param DefaultInitializeLiveInternals Whether to initialize default AAs + /// for live internal functions. Attributor(SetVector &Functions, InformationCache &InfoCache, CallGraphUpdater &CGUpdater, DenseSet *Allowed = nullptr, bool DeleteFns = true, - bool RewriteSignatures = true) + bool RewriteSignatures = true, + bool DefaultInitializeLiveInternals = true) : Allocator(InfoCache.Allocator), Functions(Functions), InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), - MaxFixpointIterations(None), OREGetter(None), PassName("") {} + MaxFixpointIterations(None), OREGetter(None), PassName(""), + DefaultInitializeLiveInternals(DefaultInitializeLiveInternals) {} /// Constructor /// @@ -1238,7 +1242,7 @@ DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), MaxFixpointIterations(MaxFixpointIterations), OREGetter(Optional(OREGetter)), - PassName(PassName) {} + PassName(PassName), DefaultInitializeLiveInternals(false) {} ~Attributor(); @@ -1500,7 +1504,8 @@ assert(F.hasLocalLinkage() && "Only local linkage is assumed dead initially."); - identifyDefaultAbstractAttributes(const_cast(F)); + if (DefaultInitializeLiveInternals) + identifyDefaultAbstractAttributes(const_cast(F)); } /// Helper function to remove callsite. @@ -2132,6 +2137,13 @@ /// The name of the pass to emit remarks for. const char *PassName = ""; + /// Flag to determine if we want to initialize all default AAs for an internal + /// function marked live. + /// TODO: This should probably be a callback, or maybe + /// identifyDefaultAbstractAttributes should be virtual, something to allow + /// customizable lazy initialization for internal functions. + const bool DefaultInitializeLiveInternals; + friend AADepGraph; friend AttributorCallGraph; }; diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -852,7 +852,7 @@ ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; AMDGPU-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: @@ -861,7 +861,7 @@ ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -870,26 +870,26 @@ ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU: Function Attrs: convergent noinline nounwind ; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized -; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR3]] -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; AMDGPU-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; AMDGPU: omp_if.then: ; AMDGPU-NEXT: store i32 0, i32* @G, align 4 -; AMDGPU-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: br label [[OMP_IF_END]] ; AMDGPU: omp_if.end: -; AMDGPU-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline nounwind ; AMDGPU-LABEL: define {{[^@]+}}@no_parallel_region_in_here -; AMDGPU-SAME: () #[[ATTR1:[0-9]+]] { +; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) @@ -957,7 +957,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: @@ -966,7 +966,7 @@ ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -977,16 +977,16 @@ ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* @__omp_outlined__2_wrapper.ID, i8** [[TMP2]], i64 0) ; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] ; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP3]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1003,13 +1003,13 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1026,7 +1026,7 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__3(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1089,7 +1089,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: @@ -1098,7 +1098,7 @@ ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1110,19 +1110,19 @@ ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 0) ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU: Function Attrs: convergent noinline nounwind ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] ; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) ; AMDGPU-NEXT: ret void ; ; @@ -1139,7 +1139,7 @@ ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1156,18 +1156,18 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__5(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU: Function Attrs: convergent noinline nounwind ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] ; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) ; AMDGPU-NEXT: ret void ; ; @@ -1237,7 +1237,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: @@ -1246,7 +1246,7 @@ ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1256,16 +1256,16 @@ ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 0) ; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] ; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* @__omp_outlined__8_wrapper.ID, i8** [[TMP3]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1282,13 +1282,13 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__7(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1305,7 +1305,7 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1362,7 +1362,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: @@ -1371,7 +1371,7 @@ ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1381,16 +1381,16 @@ ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* @__omp_outlined__10_wrapper.ID, i8** [[TMP2]], i64 0) ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* @__omp_outlined__11_wrapper.ID, i8** [[TMP3]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__10 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1407,13 +1407,13 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__10(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__11 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1430,7 +1430,7 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__11(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1487,7 +1487,7 @@ ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: @@ -1496,7 +1496,7 @@ ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__12 -; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1507,15 +1507,15 @@ ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* @__omp_outlined__13_wrapper.ID, i8** [[TMP2]], i64 0) ; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* @__omp_outlined__14_wrapper.ID, i8** [[TMP3]], i64 0) ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__13 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1532,13 +1532,13 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__13(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__14 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1555,7 +1555,7 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__14(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1607,7 +1607,7 @@ ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: @@ -1616,7 +1616,7 @@ ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__15 -; AMDGPU-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1708,7 +1708,7 @@ ; AMDGPU-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU: user_code.entry: ; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: ret void ; AMDGPU: worker.exit: @@ -1717,7 +1717,7 @@ ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16 -; AMDGPU-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1734,7 +1734,7 @@ ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__17 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1751,13 +1751,13 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__17(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__18 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1774,7 +1774,7 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__18(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1783,9 +1783,9 @@ ; AMDGPU-SAME: () #[[ATTR1]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] ; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) ; AMDGPU-NEXT: ret void ; ; @@ -1802,7 +1802,7 @@ ; ; AMDGPU: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__19 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1819,7 +1819,7 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__19(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; @@ -1834,7 +1834,7 @@ ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; NVPTX-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: @@ -1843,7 +1843,7 @@ ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1852,26 +1852,26 @@ ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX: Function Attrs: convergent noinline nounwind ; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized -; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-SAME: () #[[ATTR1:[0-9]+]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR3]] -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; NVPTX-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; NVPTX: omp_if.then: ; NVPTX-NEXT: store i32 0, i32* @G, align 4 -; NVPTX-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: br label [[OMP_IF_END]] ; NVPTX: omp_if.end: -; NVPTX-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline nounwind ; NVPTX-LABEL: define {{[^@]+}}@no_parallel_region_in_here -; NVPTX-SAME: () #[[ATTR1:[0-9]+]] { +; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) @@ -1938,7 +1938,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: @@ -1947,7 +1947,7 @@ ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1958,16 +1958,16 @@ ; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* @__omp_outlined__2_wrapper.ID, i8** [[TMP2]], i64 0) ; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] ; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP3]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1984,13 +1984,13 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2007,7 +2007,7 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__3(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2069,7 +2069,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: @@ -2078,7 +2078,7 @@ ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2090,19 +2090,19 @@ ; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 0) ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX: Function Attrs: convergent noinline nounwind ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] ; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) ; NVPTX-NEXT: ret void ; ; @@ -2119,7 +2119,7 @@ ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2136,18 +2136,18 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__5(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX: Function Attrs: convergent noinline nounwind ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] ; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) ; NVPTX-NEXT: ret void ; ; @@ -2216,7 +2216,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: @@ -2225,7 +2225,7 @@ ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2235,16 +2235,16 @@ ; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 0) ; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] ; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* @__omp_outlined__8_wrapper.ID, i8** [[TMP3]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2261,13 +2261,13 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__7(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2284,7 +2284,7 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2340,7 +2340,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: @@ -2349,7 +2349,7 @@ ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2359,16 +2359,16 @@ ; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* @__omp_outlined__10_wrapper.ID, i8** [[TMP2]], i64 0) ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* @__omp_outlined__11_wrapper.ID, i8** [[TMP3]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__10 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2385,13 +2385,13 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__10(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__11 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2408,7 +2408,7 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__11(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2464,7 +2464,7 @@ ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: @@ -2473,7 +2473,7 @@ ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__12 -; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2484,15 +2484,15 @@ ; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* @__omp_outlined__13_wrapper.ID, i8** [[TMP2]], i64 0) ; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* @__omp_outlined__14_wrapper.ID, i8** [[TMP3]], i64 0) ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__13 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2509,13 +2509,13 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__13(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__14 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2532,7 +2532,7 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__14(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2583,7 +2583,7 @@ ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: @@ -2592,7 +2592,7 @@ ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__15 -; NVPTX-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2683,7 +2683,7 @@ ; NVPTX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX: user_code.entry: ; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: ret void ; NVPTX: worker.exit: @@ -2692,7 +2692,7 @@ ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16 -; NVPTX-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2709,7 +2709,7 @@ ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__17 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2726,13 +2726,13 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__17(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__18 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2749,7 +2749,7 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__18(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2758,9 +2758,9 @@ ; NVPTX-SAME: () #[[ATTR1]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] ; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) ; NVPTX-NEXT: ret void ; ; @@ -2777,7 +2777,7 @@ ; ; NVPTX: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__19 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2794,7 +2794,7 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__19(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; @@ -2809,7 +2809,7 @@ ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: @@ -2818,7 +2818,7 @@ ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2827,26 +2827,26 @@ ; AMDGPU-DISABLED-NEXT: ret void ; ; -; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized -; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; AMDGPU-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; AMDGPU-DISABLED: omp_if.then: ; AMDGPU-DISABLED-NEXT: store i32 0, i32* @G, align 4 -; AMDGPU-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: br label [[OMP_IF_END]] ; AMDGPU-DISABLED: omp_if.end: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here -; AMDGPU-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { +; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) @@ -2873,7 +2873,7 @@ ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: @@ -2882,7 +2882,7 @@ ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2893,16 +2893,16 @@ ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP2]], i64 0) ; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP3]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2919,13 +2919,13 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2942,7 +2942,7 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -2958,7 +2958,7 @@ ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: @@ -2967,7 +2967,7 @@ ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -2979,19 +2979,19 @@ ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 0) ; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; ; -; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3008,7 +3008,7 @@ ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3025,18 +3025,18 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; -; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3063,7 +3063,7 @@ ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: @@ -3072,7 +3072,7 @@ ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3082,16 +3082,16 @@ ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP2]], i64 0) ; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** [[TMP3]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3108,13 +3108,13 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3131,7 +3131,7 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3147,7 +3147,7 @@ ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: @@ -3156,7 +3156,7 @@ ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3166,16 +3166,16 @@ ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** [[TMP2]], i64 0) ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** [[TMP3]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3192,13 +3192,13 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__10(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3215,7 +3215,7 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__11(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3231,7 +3231,7 @@ ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: @@ -3240,7 +3240,7 @@ ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3251,15 +3251,15 @@ ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** [[TMP2]], i64 0) ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** [[TMP3]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3276,13 +3276,13 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__13(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3299,7 +3299,7 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__14(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3314,7 +3314,7 @@ ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: @@ -3323,7 +3323,7 @@ ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3384,7 +3384,7 @@ ; AMDGPU-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; AMDGPU-DISABLED: user_code.entry: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: worker.exit: @@ -3393,7 +3393,7 @@ ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3410,7 +3410,7 @@ ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3427,13 +3427,13 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__17(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3450,7 +3450,7 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__18(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3459,9 +3459,9 @@ ; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3478,7 +3478,7 @@ ; ; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3495,7 +3495,7 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__19(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3510,7 +3510,7 @@ ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: @@ -3519,7 +3519,7 @@ ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3528,26 +3528,26 @@ ; NVPTX-DISABLED-NEXT: ret void ; ; -; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized -; NVPTX-DISABLED-SAME: () #[[ATTR0]] { +; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; NVPTX-DISABLED-NEXT: br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] ; NVPTX-DISABLED: omp_if.then: ; NVPTX-DISABLED-NEXT: store i32 0, i32* @G, align 4 -; NVPTX-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: br label [[OMP_IF_END]] ; NVPTX-DISABLED: omp_if.end: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@no_parallel_region_in_here -; NVPTX-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { +; NVPTX-DISABLED-SAME: () #[[ATTR1]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) @@ -3574,7 +3574,7 @@ ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: @@ -3583,7 +3583,7 @@ ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3594,16 +3594,16 @@ ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP2]], i64 0) ; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP3]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3620,13 +3620,13 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3643,7 +3643,7 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3659,7 +3659,7 @@ ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: @@ -3668,7 +3668,7 @@ ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3680,19 +3680,19 @@ ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 0) ; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; ; -; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; NVPTX-DISABLED-SAME: () #[[ATTR0]] { +; NVPTX-DISABLED-SAME: () #[[ATTR1]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3709,7 +3709,7 @@ ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3726,18 +3726,18 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; -; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; NVPTX-DISABLED-SAME: () #[[ATTR0]] { +; NVPTX-DISABLED-SAME: () #[[ATTR1]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3764,7 +3764,7 @@ ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: @@ -3773,7 +3773,7 @@ ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3783,16 +3783,16 @@ ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP2]], i64 0) ; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** [[TMP3]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3809,13 +3809,13 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3832,7 +3832,7 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3848,7 +3848,7 @@ ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: @@ -3857,7 +3857,7 @@ ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3867,16 +3867,16 @@ ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** [[TMP2]], i64 0) ; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** [[TMP3]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__10 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3893,13 +3893,13 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__10(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__11 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3916,7 +3916,7 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__11(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3932,7 +3932,7 @@ ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__12(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: @@ -3941,7 +3941,7 @@ ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__12 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3952,15 +3952,15 @@ ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** [[TMP2]], i64 0) ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** [[TMP3]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__13 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -3977,13 +3977,13 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__13(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__14 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -4000,7 +4000,7 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__14(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -4015,7 +4015,7 @@ ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: @@ -4024,7 +4024,7 @@ ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__15 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -4085,7 +4085,7 @@ ; NVPTX-DISABLED-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; NVPTX-DISABLED: user_code.entry: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__16(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: worker.exit: @@ -4094,7 +4094,7 @@ ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -4111,7 +4111,7 @@ ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__17 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -4128,13 +4128,13 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__17(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__18 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -4151,7 +4151,7 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__18(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -4160,9 +4160,9 @@ ; NVPTX-DISABLED-SAME: () #[[ATTR1]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -4179,7 +4179,7 @@ ; ; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__19 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -4196,6 +4196,6 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; diff --git a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll --- a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll +++ b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll @@ -30,9 +30,9 @@ ; CHECK-DAG: icmp eq void (i16, i32)* %worker.work_fn.addr_cast, bitcast (i8* @__omp_outlined__2_wrapper.ID to void (i16, i32)*) -; CHECK-DAG: call void @__kmpc_parallel_51(%struct.ident_t* noundef @1, i32 %{{.*}}, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* noundef @__omp_outlined__1_wrapper.ID, i8** noundef %{{.*}}, i64 noundef 0) -; CHECK-DAG: call void @__kmpc_parallel_51(%struct.ident_t* noundef @1, i32 %{{.*}}, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef %{{.*}}, i64 noundef 0) -; CHECK-DAG: call void @__kmpc_parallel_51(%struct.ident_t* noundef @2, i32 %{{.*}}, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef %{{.*}}, i64 noundef 0) +; CHECK-DAG: call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** %{{.*}}, i64 0) +; CHECK-DAG: call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* @__omp_outlined__2_wrapper.ID, i8** %{{.*}}, i64 0) +; CHECK-DAG: call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %{{.*}}, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %{{.*}}, i64 0) %struct.ident_t = type { i32, i32, i32, i32, i8* } diff --git a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll --- a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll +++ b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll @@ -49,9 +49,9 @@ define weak void @parallel() { ; CHECK-LABEL: define {{[^@]+}}@parallel() { -; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* align 4294967296 null, i8 2, i1 false, i1 false) +; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) ; CHECK-NEXT: call void @spmd_helper() -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noalias noundef align 4294967296 null, i32 noundef 0, i32 noundef 0, i32 noundef 0, i32 noundef 0, i8* noalias noundef align 4294967296 null, i8* noalias noundef align 4294967296 null, i8** noalias noundef align 4294967296 null, i64 noundef 0) +; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0) ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; @@ -108,7 +108,7 @@ define internal void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) { ; CHECK-LABEL: define {{[^@]+}}@__kmpc_parallel_51 -; CHECK-SAME: (%struct.ident_t* noalias nocapture nofree readnone align 4294967296 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[TMP5:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[TMP6:%.*]], i8** noalias nocapture nofree readnone align 4294967296 [[TMP7:%.*]], i64 [[TMP8:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]], i8* [[TMP5:%.*]], i8* [[TMP6:%.*]], i8** [[TMP7:%.*]], i64 [[TMP8:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: call void @parallel_helper() ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -30,18 +30,18 @@ ; CHECK-LABEL: define {{[^@]+}}@kernel() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true) -; CHECK-NEXT: call void @foo() #[[ATTR4:[0-9]+]] -; CHECK-NEXT: call void @bar() #[[ATTR4]] -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR3:[0-9]+]] +; CHECK-NEXT: call void @foo() #[[ATTR0:[0-9]+]] +; CHECK-NEXT: call void @bar() #[[ATTR0]] +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@kernel() { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true) -; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR4:[0-9]+]] -; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR4]] -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR3:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR0:[0-9]+]] +; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR0]] +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR5:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) ; CHECK-DISABLED-NEXT: ret void ; @@ -56,13 +56,13 @@ define internal void @foo() { ; CHECK-LABEL: define {{[^@]+}}@foo -; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@foo -; CHECK-DISABLED-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-DISABLED-SAME: () #[[ATTR0]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 ; CHECK-DISABLED-NEXT: ret void @@ -76,19 +76,19 @@ define internal void @bar() { ; CHECK-LABEL: define {{[^@]+}}@bar -; CHECK-SAME: () #[[ATTR0]] { +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR4]], !dbg [[DBG8:![0-9]+]] -; CHECK-NEXT: call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR5:[0-9]+]], !dbg [[DBG8]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 noundef 4) #[[ATTR4]] +; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] +; CHECK-NEXT: call void @share(i8* nofree [[TMP0]]) #[[ATTR6:[0-9]+]], !dbg [[DBG8]] +; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 4) #[[ATTR0]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@bar -; CHECK-DISABLED-SAME: () #[[ATTR0]] { +; CHECK-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: -; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR4]], !dbg [[DBG8:![0-9]+]] -; CHECK-DISABLED-NEXT: call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR5:[0-9]+]], !dbg [[DBG8]] -; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 noundef 4) #[[ATTR4]] +; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] +; CHECK-DISABLED-NEXT: call void @share(i8* nofree [[TMP0]]) #[[ATTR6:[0-9]+]], !dbg [[DBG8]] +; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 4) #[[ATTR0]] ; CHECK-DISABLED-NEXT: ret void ; entry: @@ -100,12 +100,12 @@ define internal void @use(i8* %x) { ; CHECK-LABEL: define {{[^@]+}}@use -; CHECK-SAME: (i8* noalias nocapture nofree readnone [[X:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-SAME: (i8* [[X:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@use -; CHECK-DISABLED-SAME: (i8* noalias nocapture nofree readnone [[X:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-DISABLED-SAME: (i8* [[X:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: ret void ; @@ -115,13 +115,13 @@ define internal void @share(i8* %x) { ; CHECK-LABEL: define {{[^@]+}}@share -; CHECK-SAME: (i8* nofree writeonly [[X:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-SAME: (i8* nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: store i8* [[X]], i8** @S, align 8 ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@share -; CHECK-DISABLED-SAME: (i8* nofree writeonly [[X:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-DISABLED-SAME: (i8* nofree [[X:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: store i8* [[X]], i8** @S, align 8 ; CHECK-DISABLED-NEXT: ret void @@ -135,13 +135,13 @@ ; CHECK-LABEL: define {{[^@]+}}@unused() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 -; CHECK-NEXT: call void @use(i8* noalias readnone undef) +; CHECK-NEXT: call void @use(i8* undef) ; CHECK-NEXT: ret void ; ; CHECK-DISABLED-LABEL: define {{[^@]+}}@unused() { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4), !dbg [[DBG11:![0-9]+]] -; CHECK-DISABLED-NEXT: call void @use(i8* noalias readnone [[TMP0]]) +; CHECK-DISABLED-NEXT: call void @use(i8* [[TMP0]]) ; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 4) ; CHECK-DISABLED-NEXT: ret void ; @@ -180,19 +180,21 @@ !13 = !DILocation(line: 4, column: 2, scope: !9) !14 = !DILocation(line: 6, column: 2, scope: !9) ;. -; CHECK: attributes #[[ATTR0]] = { nosync nounwind } -; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CHECK: attributes #[[ATTR3]] = { "llvm.assume"="omp_no_openmp" } -; CHECK: attributes #[[ATTR4]] = { nounwind } -; CHECK: attributes #[[ATTR5]] = { nosync nounwind writeonly } +; CHECK: attributes #[[ATTR0]] = { nounwind } +; CHECK: attributes #[[ATTR1]] = { nosync nounwind } +; CHECK: attributes #[[ATTR2]] = { nounwind readnone } +; CHECK: attributes #[[ATTR3]] = { nofree nosync nounwind writeonly } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) } +; CHECK: attributes #[[ATTR5]] = { "llvm.assume"="omp_no_openmp" } +; CHECK: attributes #[[ATTR6]] = { nosync nounwind writeonly } ;. -; CHECK-DISABLED: attributes #[[ATTR0]] = { nosync nounwind } -; CHECK-DISABLED: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CHECK-DISABLED: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CHECK-DISABLED: attributes #[[ATTR3]] = { "llvm.assume"="omp_no_openmp" } -; CHECK-DISABLED: attributes #[[ATTR4]] = { nounwind } -; CHECK-DISABLED: attributes #[[ATTR5]] = { nosync nounwind writeonly } +; CHECK-DISABLED: attributes #[[ATTR0]] = { nounwind } +; CHECK-DISABLED: attributes #[[ATTR1]] = { nosync nounwind } +; CHECK-DISABLED: attributes #[[ATTR2]] = { nounwind readnone } +; CHECK-DISABLED: attributes #[[ATTR3]] = { nofree nosync nounwind writeonly } +; CHECK-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) } +; CHECK-DISABLED: attributes #[[ATTR5]] = { "llvm.assume"="omp_no_openmp" } +; CHECK-DISABLED: attributes #[[ATTR6]] = { nosync nounwind writeonly } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) ; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "remove_globalization.c", directory: "/tmp/remove_globalization.c") diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -138,11 +138,11 @@ ; CHECK-LABEL: define {{[^@]+}}@foo() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) -; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5:[0-9]+]] ; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[X]] to i32* ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[X_ON_STACK]] to i8* -; CHECK-NEXT: call void @use.internalized(i8* nofree align 4 [[TMP0]]) #[[ATTR7:[0-9]+]] +; CHECK-NEXT: call void @use.internalized(i8* nofree [[TMP0]]) #[[ATTR7:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR8:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-NEXT: ret void @@ -156,7 +156,7 @@ ; CHECK: master1: ; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*) to [4 x i32]* ; CHECK-NEXT: [[A0:%.*]] = bitcast [4 x i32]* [[X_ON_STACK]] to i8* -; CHECK-NEXT: call void @use.internalized(i8* nofree align 4 [[A0]]) #[[ATTR7]] +; CHECK-NEXT: call void @use.internalized(i8* nofree [[A0]]) #[[ATTR7]] ; CHECK-NEXT: br label [[NEXT:%.*]] ; CHECK: next: ; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5]] @@ -164,7 +164,7 @@ ; CHECK: master2: ; CHECK-NEXT: [[Y_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*) to [4 x i32]* ; CHECK-NEXT: [[B1:%.*]] = bitcast [4 x i32]* [[Y_ON_STACK]] to i8* -; CHECK-NEXT: call void @use.internalized(i8* nofree align 4 [[B1]]) #[[ATTR7]] +; CHECK-NEXT: call void @use.internalized(i8* nofree [[B1]]) #[[ATTR7]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -177,10 +177,10 @@ ; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]] ; CHECK: master3: -; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 noundef 24) #[[ATTR6]], !dbg [[DBG9:![0-9]+]] +; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG9:![0-9]+]] ; CHECK-NEXT: [[Z_ON_STACK:%.*]] = bitcast i8* [[Z]] to [6 x i32]* ; CHECK-NEXT: [[C1:%.*]] = bitcast [6 x i32]* [[Z_ON_STACK]] to i8* -; CHECK-NEXT: call void @use.internalized(i8* nofree writeonly align 4 [[C1]]) #[[ATTR7]] +; CHECK-NEXT: call void @use.internalized(i8* nofree [[C1]]) #[[ATTR7]] ; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR8]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -188,9 +188,9 @@ ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree nounwind writeonly ; CHECK-LABEL: define {{[^@]+}}@use.internalized -; CHECK-SAME: (i8* nofree writeonly align 4 [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (i8* nofree [[X:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: store i8* [[X]], i8** @S, align 8 ; CHECK-NEXT: ret void @@ -210,8 +210,8 @@ ; CHECK-NEXT: ret i8* [[GEP]] ; ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readonly willreturn allocsize(0) } +; CHECK: attributes #[[ATTR0]] = { nofree nounwind writeonly } +; CHECK: attributes #[[ATTR1]] = { nosync nounwind readonly allocsize(0) } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nosync nounwind } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind readnone speculatable } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nofree nosync nounwind readnone speculatable willreturn } diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -178,9 +178,9 @@ ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2:[0-9]+]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; AMDGPU-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; @@ -195,9 +195,9 @@ ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2:[0-9]+]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; NVPTX-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: br label [[COMMON_RET]] ; @@ -249,9 +249,9 @@ ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2:[0-9]+]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -302,9 +302,9 @@ ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2:[0-9]+]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3:[0-9]+]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -330,7 +330,7 @@ ; Function Attrs: alwaysinline convergent norecurse nounwind define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] @@ -339,17 +339,17 @@ ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU: for.cond.cleanup: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR5:[0-9]+]] +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-NEXT: br label [[FOR_COND:%.*]] @@ -358,17 +358,17 @@ ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX: for.cond.cleanup: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR5:[0-9]+]] +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP1]], i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] @@ -377,17 +377,17 @@ ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU-DISABLED: for.cond.cleanup: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5:[0-9]+]] +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: for.body: ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* noundef @__omp_outlined__1_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** [[TMP1]], i64 0) ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__ -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] @@ -396,12 +396,12 @@ ; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX-DISABLED: for.cond.cleanup: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5:[0-9]+]] +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6:[0-9]+]] ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: for.body: ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* noundef @__omp_outlined__1_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* @__omp_outlined__1_wrapper.ID, i8** [[TMP1]], i64 0) ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; @@ -429,27 +429,27 @@ ; Function Attrs: alwaysinline convergent norecurse nounwind define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] +; NVPTX-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -466,7 +466,7 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper @@ -476,7 +476,7 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper @@ -486,7 +486,7 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper @@ -496,7 +496,7 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -523,9 +523,9 @@ ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; @@ -540,9 +540,9 @@ ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: br label [[COMMON_RET]] ; @@ -594,9 +594,9 @@ ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -647,9 +647,9 @@ ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -675,92 +675,92 @@ ; Function Attrs: alwaysinline convergent norecurse nounwind define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4, addrspace(5) ; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8* ; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32* -; AMDGPU-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR5]] +; AMDGPU-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU: for.cond: ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU: for.cond.cleanup: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR5]] +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP2]], i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 ; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* -; NVPTX-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR5]] +; NVPTX-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] ; NVPTX-NEXT: br label [[FOR_COND:%.*]] ; NVPTX: for.cond: ; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX: for.cond.cleanup: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR5]] +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP2]], i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8* ; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32* -; AMDGPU-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR5]] +; AMDGPU-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED: for.cond: ; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU-DISABLED: for.cond.cleanup: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: for.body: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP2]], i64 0) ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 ; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* -; NVPTX-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR5]] +; NVPTX-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR6]] ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; NVPTX-DISABLED: for.cond: ; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] ; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX-DISABLED: for.cond.cleanup: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: for.body: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP2]], i64 0) ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; @@ -791,27 +791,27 @@ ; Function Attrs: alwaysinline convergent norecurse nounwind define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: call void @unknown() #[[ATTR6]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: call void @unknown() #[[ATTR6]] +; NVPTX-NEXT: call void @unknown() #[[ATTR7]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -828,7 +828,7 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__3(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper @@ -838,7 +838,7 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__3(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper @@ -848,7 +848,7 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper @@ -858,7 +858,7 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -886,9 +886,9 @@ ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; @@ -903,9 +903,9 @@ ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: br label [[COMMON_RET]] ; @@ -957,9 +957,9 @@ ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -1010,9 +1010,9 @@ ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -1038,7 +1038,7 @@ ; Function Attrs: alwaysinline convergent norecurse nounwind define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] @@ -1047,19 +1047,19 @@ ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU: for.cond.cleanup: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR5]] +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: ; AMDGPU-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 ; AMDGPU-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 1) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 1) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 ; NVPTX-NEXT: br label [[FOR_COND:%.*]] @@ -1068,19 +1068,19 @@ ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX: for.cond.cleanup: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR5]] +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: ; NVPTX-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 ; NVPTX-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 1) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP2]], i64 1) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] @@ -1089,19 +1089,19 @@ ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU-DISABLED: for.cond.cleanup: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: for.body: ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 ; AMDGPU-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 1) ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__4 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] @@ -1110,14 +1110,14 @@ ; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX-DISABLED: for.cond.cleanup: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: for.body: ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 ; NVPTX-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]] ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* @__omp_outlined__5_wrapper.ID, i8** [[TMP2]], i64 1) ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] ; @@ -1150,39 +1150,39 @@ ; Function Attrs: alwaysinline convergent norecurse nounwind define internal void @__omp_outlined__5(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; AMDGPU-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: call void @unknown() #[[ATTR6]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; NVPTX-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: call void @unknown() #[[ATTR6]] +; NVPTX-NEXT: call void @unknown() #[[ATTR7]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -1205,7 +1205,7 @@ ; AMDGPU-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** ; AMDGPU-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; AMDGPU-NEXT: call void @__omp_outlined__5(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]], i32* nonnull align 4 dereferenceable(4) [[TMP4]]) #[[ATTR2]] +; AMDGPU-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper @@ -1218,7 +1218,7 @@ ; NVPTX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** ; NVPTX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; NVPTX-NEXT: call void @__omp_outlined__5(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]], i32* nonnull align 4 dereferenceable(4) [[TMP4]]) #[[ATTR2]] +; NVPTX-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper @@ -1231,7 +1231,7 @@ ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** ; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]], i32* nonnull align 4 dereferenceable(4) [[TMP4]]) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper @@ -1244,7 +1244,7 @@ ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** ; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]], i32* nonnull align 4 dereferenceable(4) [[TMP4]]) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -1274,9 +1274,9 @@ ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; @@ -1291,9 +1291,9 @@ ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; NVPTX-NEXT: br label [[COMMON_RET]] ; @@ -1345,9 +1345,9 @@ ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -1398,9 +1398,9 @@ ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -1426,7 +1426,7 @@ ; Function Attrs: alwaysinline convergent norecurse nounwind define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 ; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*) to i32* @@ -1450,19 +1450,19 @@ ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU: for.cond.cleanup: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR5]] +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: ; AMDGPU-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 ; AMDGPU-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]] ; AMDGPU-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP3]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP4]], i64 noundef 1) +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP4]], i64 1) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 ; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*) to i32* @@ -1486,19 +1486,19 @@ ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX: for.cond.cleanup: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR5]] +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: ; NVPTX-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 ; NVPTX-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]] ; NVPTX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP3]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP4]], i64 noundef 1) +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** [[TMP4]], i64 1) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 ; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*) to i32* @@ -1509,19 +1509,19 @@ ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU-DISABLED: for.cond.cleanup: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: for.body: ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 ; AMDGPU-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]] ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 1) ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__6 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 ; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*) to i32* @@ -1532,14 +1532,14 @@ ; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX-DISABLED: for.cond.cleanup: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: for.body: ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 ; NVPTX-DISABLED-NEXT: store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]] ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1) +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* @__omp_outlined__7_wrapper.ID, i8** [[TMP2]], i64 1) ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] ; @@ -1573,39 +1573,39 @@ ; Function Attrs: alwaysinline convergent norecurse nounwind define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* nonnull align 4 dereferenceable(4) %x) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; AMDGPU-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: call void @unknown() #[[ATTR6]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; NVPTX-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: call void @unknown() #[[ATTR6]] +; NVPTX-NEXT: call void @unknown() #[[ATTR7]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; NVPTX-DISABLED-NEXT: store i32 [[INC]], i32* [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -1628,7 +1628,7 @@ ; AMDGPU-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** ; AMDGPU-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; AMDGPU-NEXT: call void @__omp_outlined__7(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]], i32* nonnull align 4 dereferenceable(4) [[TMP4]]) #[[ATTR2]] +; AMDGPU-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper @@ -1641,7 +1641,7 @@ ; NVPTX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** ; NVPTX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; NVPTX-NEXT: call void @__omp_outlined__7(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]], i32* nonnull align 4 dereferenceable(4) [[TMP4]]) #[[ATTR2]] +; NVPTX-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper @@ -1654,7 +1654,7 @@ ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** ; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]], i32* nonnull align 4 dereferenceable(4) [[TMP4]]) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper @@ -1667,7 +1667,7 @@ ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32** ; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]], i32* nonnull align 4 dereferenceable(4) [[TMP4]]) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP4]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -1728,8 +1728,8 @@ ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] -; AMDGPU-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; @@ -1774,8 +1774,8 @@ ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] -; NVPTX-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-NEXT: br label [[COMMON_RET]] ; @@ -1821,8 +1821,8 @@ ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -1867,8 +1867,8 @@ ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -1894,27 +1894,27 @@ ; Function Attrs: alwaysinline convergent norecurse nounwind define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: call void @unknown() #[[ATTR6]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: call void @unknown() #[[ATTR6]] +; NVPTX-NEXT: call void @unknown() #[[ATTR7]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -1971,10 +1971,10 @@ ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] -; AMDGPU-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR2]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR3]] ; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* -; AMDGPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR2]] +; AMDGPU-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR3]] ; AMDGPU-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) ; AMDGPU-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -2026,10 +2026,10 @@ ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] -; NVPTX-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR2]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; NVPTX-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR3]] ; NVPTX-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* -; NVPTX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR2]] +; NVPTX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR3]] ; NVPTX-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) ; NVPTX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -2082,10 +2082,10 @@ ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* -; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -2137,10 +2137,10 @@ ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]] -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR3]] +; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @"_omp_task_entry$" to i32 (i32, i8*)*)) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.kmp_task_t_with_privates* -; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP2]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__9 to i8*), i8* @__omp_outlined__9_wrapper.ID, i8** [[TMP5]], i64 0) ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -2169,27 +2169,27 @@ ; Function Attrs: alwaysinline convergent nounwind define internal void @.omp_outlined.(i32 %.global_tid., i32* noalias %.part_id., i8* noalias %.privates., void (i8*, ...)* noalias %.copy_fn., i8* %.task_t., %struct.anon* noalias %__context) #9 { ; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined. -; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone align 4294967296 [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR5]] +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR6]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined. -; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone align 4294967296 [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR5]] +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR6]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. -; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone align 4294967296 [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. -; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone align 4294967296 [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTPART_ID_:%.*]], i8* noalias [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias [[DOTCOPY_FN_:%.*]], i8* [[DOTTASK_T_:%.*]], %struct.anon* noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR6]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -2253,27 +2253,27 @@ ; Function Attrs: alwaysinline convergent norecurse nounwind define internal void @__omp_outlined__9(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: call void @unknown() #[[ATTR6]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: call void @unknown() #[[ATTR6]] +; NVPTX-NEXT: call void @unknown() #[[ATTR7]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 -; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 -; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR6]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR7]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -2290,7 +2290,7 @@ ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper @@ -2300,7 +2300,7 @@ ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper @@ -2310,7 +2310,7 @@ ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper @@ -2320,7 +2320,7 @@ ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -2390,47 +2390,51 @@ ;. ; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } ; AMDGPU: attributes #[[ATTR1]] = { convergent norecurse nounwind } -; AMDGPU: attributes #[[ATTR2]] = { nounwind } -; AMDGPU: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind } -; AMDGPU: attributes #[[ATTR4:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; AMDGPU: attributes #[[ATTR5]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; AMDGPU: attributes #[[ATTR6]] = { convergent } -; AMDGPU: attributes #[[ATTR7:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } -; AMDGPU: attributes #[[ATTR8:[0-9]+]] = { alwaysinline } -; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { convergent nounwind } +; AMDGPU: attributes #[[ATTR2]] = { alwaysinline convergent nounwind } +; AMDGPU: attributes #[[ATTR3]] = { nounwind } +; AMDGPU: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } +; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; AMDGPU: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; AMDGPU: attributes #[[ATTR7]] = { convergent } +; AMDGPU: attributes #[[ATTR8:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } +; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { alwaysinline } +; AMDGPU: attributes #[[ATTR10:[0-9]+]] = { convergent nounwind } ;. ; NVPTX: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } ; NVPTX: attributes #[[ATTR1]] = { convergent norecurse nounwind } -; NVPTX: attributes #[[ATTR2]] = { nounwind } -; NVPTX: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind } -; NVPTX: attributes #[[ATTR4:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; NVPTX: attributes #[[ATTR5]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; NVPTX: attributes #[[ATTR6]] = { convergent } -; NVPTX: attributes #[[ATTR7:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } -; NVPTX: attributes #[[ATTR8:[0-9]+]] = { alwaysinline } -; NVPTX: attributes #[[ATTR9:[0-9]+]] = { convergent nounwind } +; NVPTX: attributes #[[ATTR2]] = { alwaysinline convergent nounwind } +; NVPTX: attributes #[[ATTR3]] = { nounwind } +; NVPTX: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } +; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; NVPTX: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; NVPTX: attributes #[[ATTR7]] = { convergent } +; NVPTX: attributes #[[ATTR8:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } +; NVPTX: attributes #[[ATTR9:[0-9]+]] = { alwaysinline } +; NVPTX: attributes #[[ATTR10:[0-9]+]] = { convergent nounwind } ;. ; AMDGPU-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } ; AMDGPU-DISABLED: attributes #[[ATTR1]] = { convergent norecurse nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR2]] = { nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; AMDGPU-DISABLED: attributes #[[ATTR5]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; AMDGPU-DISABLED: attributes #[[ATTR6]] = { convergent } -; AMDGPU-DISABLED: attributes #[[ATTR7:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } -; AMDGPU-DISABLED: attributes #[[ATTR8:[0-9]+]] = { alwaysinline } -; AMDGPU-DISABLED: attributes #[[ATTR9:[0-9]+]] = { convergent nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR2]] = { alwaysinline convergent nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR3]] = { nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; AMDGPU-DISABLED: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; AMDGPU-DISABLED: attributes #[[ATTR7]] = { convergent } +; AMDGPU-DISABLED: attributes #[[ATTR8:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } +; AMDGPU-DISABLED: attributes #[[ATTR9:[0-9]+]] = { alwaysinline } +; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { convergent nounwind } ;. ; NVPTX-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } ; NVPTX-DISABLED: attributes #[[ATTR1]] = { convergent norecurse nounwind } -; NVPTX-DISABLED: attributes #[[ATTR2]] = { nounwind } -; NVPTX-DISABLED: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind } -; NVPTX-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; NVPTX-DISABLED: attributes #[[ATTR5]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; NVPTX-DISABLED: attributes #[[ATTR6]] = { convergent } -; NVPTX-DISABLED: attributes #[[ATTR7:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } -; NVPTX-DISABLED: attributes #[[ATTR8:[0-9]+]] = { alwaysinline } -; NVPTX-DISABLED: attributes #[[ATTR9:[0-9]+]] = { convergent nounwind } +; NVPTX-DISABLED: attributes #[[ATTR2]] = { alwaysinline convergent nounwind } +; NVPTX-DISABLED: attributes #[[ATTR3]] = { nounwind } +; NVPTX-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind } +; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; NVPTX-DISABLED: attributes #[[ATTR6]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; NVPTX-DISABLED: attributes #[[ATTR7]] = { convergent } +; NVPTX-DISABLED: attributes #[[ATTR8:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } +; NVPTX-DISABLED: attributes #[[ATTR9:[0-9]+]] = { alwaysinline } +; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { convergent nounwind } ;. ; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} ; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll --- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll @@ -91,7 +91,7 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: call void @spmd_helper() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: call void @spmd_helper() #[[ATTR5:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -124,7 +124,7 @@ ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: call void @generic_helper() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @generic_helper() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -147,13 +147,13 @@ ; Function Attrs: convergent noinline nounwind define internal void @spmd_helper() #1 { ; CHECK-LABEL: define {{[^@]+}}@spmd_helper -; CHECK-SAME: () #[[ATTR0]] { +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: call void @leaf() #[[ATTR6:[0-9]+]] -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: call void @leaf() #[[ATTR6]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* noundef @__omp_outlined___wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* @__omp_outlined___wrapper.ID, i8** [[TMP1]], i64 0) ; CHECK-NEXT: ret void ; entry: @@ -168,7 +168,7 @@ ; Function Attrs: convergent noinline norecurse nounwind define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -194,7 +194,7 @@ ; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 ; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -; CHECK-NEXT: call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTADDR1]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR1]] +; CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] ; CHECK-NEXT: ret void ; entry: @@ -221,7 +221,7 @@ ; Function Attrs: convergent noinline nounwind define internal void @leaf() #1 { ; CHECK-LABEL: define {{[^@]+}}@leaf -; CHECK-SAME: () #[[ATTR3:[0-9]+]] { +; CHECK-SAME: () #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 42, i32* @G, align 4 ; CHECK-NEXT: ret void @@ -234,7 +234,7 @@ ; Function Attrs: convergent noinline nounwind define internal void @generic_helper() #1 { ; CHECK-LABEL: define {{[^@]+}}@generic_helper -; CHECK-SAME: () #[[ATTR3]] { +; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @leaf() #[[ATTR6]] ; CHECK-NEXT: ret void @@ -270,12 +270,12 @@ !9 = !{!"clang version 14.0.0"} ;. ; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -; CHECK: attributes #[[ATTR1]] = { nounwind } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { alwaysinline } -; CHECK: attributes #[[ATTR3]] = { convergent nofree noinline norecurse nosync nounwind willreturn writeonly "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -; CHECK: attributes #[[ATTR4]] = { convergent nounwind } -; CHECK: attributes #[[ATTR5]] = { convergent nounwind writeonly } -; CHECK: attributes #[[ATTR6]] = { convergent nofree nosync nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK: attributes #[[ATTR2]] = { nounwind } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { alwaysinline } +; CHECK: attributes #[[ATTR4]] = { convergent noinline nounwind writeonly "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK: attributes #[[ATTR5]] = { convergent nounwind } +; CHECK: attributes #[[ATTR6]] = { convergent nounwind writeonly } ; CHECK: attributes #[[ATTR7]] = { convergent } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}