diff --git a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h --- a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h +++ b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h @@ -33,11 +33,6 @@ bool isKnown() { return Value != OpenMP::UNKNOWN; } operator bool() { return Value != OpenMP::NOT_FOUND; } - /// Does this function \p F contain any OpenMP runtime calls? - bool containsOMPRuntimeCalls(Function *F) const { - return FuncsWithOMPRuntimeCalls.contains(F); - } - /// Return the known kernels (=GPU entry points) in the module. SmallPtrSetImpl &getKernels() { return Kernels; } @@ -49,9 +44,6 @@ friend bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule); - /// In which functions are OpenMP runtime calls present? - SmallPtrSet FuncsWithOMPRuntimeCalls; - /// Collection of known kernels (=GPU entry points) in the module. SmallPtrSet Kernels; }; diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2514,20 +2514,12 @@ SmallVector SCC; // If there are kernels in the module, we have to run on all SCC's. - bool SCCIsInteresting = !OMPInModule.getKernels().empty(); for (LazyCallGraph::Node &N : C) { Function *Fn = &N.getFunction(); SCC.push_back(Fn); - - // Do we already know that the SCC contains kernels, - // or that OpenMP functions are called from this SCC? - if (SCCIsInteresting) - continue; - // If not, let's check that. - SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn); } - if (!SCCIsInteresting || SCC.empty()) + if (SCC.empty()) return PreservedAnalyses::all(); FunctionAnalysisManager &FAM = @@ -2585,22 +2577,14 @@ SmallVector SCC; // If there are kernels in the module, we have to run on all SCC's. - bool SCCIsInteresting = !OMPInModule.getKernels().empty(); for (CallGraphNode *CGN : CGSCC) { Function *Fn = CGN->getFunction(); if (!Fn || Fn->isDeclaration()) continue; SCC.push_back(Fn); - - // Do we already know that the SCC contains kernels, - // or that OpenMP functions are called from this SCC? - if (SCCIsInteresting) - continue; - // If not, let's check that. - SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn); } - if (!SCCIsInteresting || SCC.empty()) + if (SCC.empty()) return false; CallGraph &CG = getAnalysis().getCallGraph(); @@ -2661,32 +2645,18 @@ if (OMPInModule.isKnown()) return OMPInModule; - auto RecordFunctionsContainingUsesOf = [&](Function *F) { - for (User *U : F->users()) - if (auto *I = dyn_cast(U)) - OMPInModule.FuncsWithOMPRuntimeCalls.insert(I->getFunction()); - }; - - // MSVC doesn't like long if-else chains for some reason and instead just - // issues an error. Work around it.. - do { -#define OMP_RTL(_Enum, _Name, ...) \ - if (Function *F = M.getFunction(_Name)) { \ - RecordFunctionsContainingUsesOf(F); \ - OMPInModule = true; \ - } -#include "llvm/Frontend/OpenMP/OMPKinds.def" - } while (false); + Metadata *MD = M.getModuleFlag("openmp"); + if (!MD) + return OMPInModule = false; + OMPInModule = true; // Identify kernels once. TODO: We should split the OMPInformationCache into a // module and an SCC part. The kernel information, among other things, could // go into the module part. - if (OMPInModule.isKnown() && OMPInModule) { + if (OMPInModule.isKnown() && OMPInModule) OMPInModule.identifyKernels(M); - return true; - } - return OMPInModule = false; + return true; } char OpenMPOptCGSCCLegacyPass::ID = 0; diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -1739,3 +1739,6 @@ ; OPTIMISTIC: ; Function Attrs: convergent noinline nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture nofree readonly, i32) +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll b/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll --- a/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll @@ -26,3 +26,7 @@ ; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i64) + +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll --- a/llvm/test/Transforms/OpenMP/deduplication.ll +++ b/llvm/test/Transforms/OpenMP/deduplication.ll @@ -221,3 +221,7 @@ call void @useI32(i32 %tid5) ret void } + +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll --- a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll +++ b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll @@ -30,7 +30,7 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!8, !9, !10, !11, !12} +!llvm.module.flags = !{!8, !9, !10, !11, !12, !29} !llvm.ident = !{!13} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None) @@ -62,3 +62,4 @@ !26 = !DILocation(line: 9, column: 10, scope: !14) !27 = !DILocation(line: 10, column: 2, scope: !14) !28 = !DILocation(line: 13, column: 1, scope: !14) +!29 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/globalization_remarks.ll b/llvm/test/Transforms/OpenMP/globalization_remarks.ll --- a/llvm/test/Transforms/OpenMP/globalization_remarks.ll +++ b/llvm/test/Transforms/OpenMP/globalization_remarks.ll @@ -107,7 +107,7 @@ !llvm.dbg.cu = !{!0} !omp_offload.info = !{!3} !nvvm.annotations = !{!4} -!llvm.module.flags = !{!5, !6, !7, !8} +!llvm.module.flags = !{!5, !6, !7, !8, !32} !llvm.ident = !{!9} !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 12.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: DebugDirectivesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) @@ -142,4 +142,4 @@ !29 = !DILocation(line: 5, column: 26, scope: !21, inlinedAt: !30) !30 = distinct !DILocation(line: 9, column: 10, scope: !15) !31 = !DILocation(line: 10, column: 1, scope: !15) - +!32 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll --- a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll +++ b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll @@ -19,9 +19,11 @@ ; Needed to trigger the openmp-opt pass declare dso_local void @__kmpc_kernel_prepare_parallel(i8*) +!llvm.module.flags = !{!4} !nvvm.annotations = !{!2, !0, !1, !3, !1, !2} !0 = !{void ()* @kernel1, !"kernel", i32 1} !1 = !{void ()* @non_kernel, !"non_kernel", i32 1} !2 = !{null, !"align", i32 1} !3 = !{void ()* @kernel2, !"kernel", i32 1} +!4 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll --- a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll +++ b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll @@ -274,6 +274,8 @@ declare void @__kmpc_kernel_end_parallel() +!llvm.module.flags = !{!0} !nvvm.annotations = !{!1} +!0 = !{i32 7, !"openmp", i32 50} !1 = !{void ()* @__omp_offloading_50_6dfa0f01_foo_l6, !"kernel", i32 1} diff --git a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll --- a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll +++ b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll @@ -504,3 +504,7 @@ ; CHECK: declare void @__tgt_target_data_begin_mapper_issue(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**, %struct.__tgt_async_info*) ; CHECK: declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info*) + +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/icv_remarks.ll b/llvm/test/Transforms/OpenMP/icv_remarks.ll --- a/llvm/test/Transforms/OpenMP/icv_remarks.ll +++ b/llvm/test/Transforms/OpenMP/icv_remarks.ll @@ -67,7 +67,7 @@ attributes #5 = { nounwind readnone speculatable willreturn } !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!13, !14, !15} +!llvm.module.flags = !{!13, !14, !15, !59} !llvm.ident = !{!16} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git 73cea83a6f5ab521edf3cccfc603534776d691ec)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None) @@ -129,3 +129,4 @@ !56 = !DILocation(line: 18, column: 1, scope: !33) !57 = !{!58} !58 = !{i64 2, i64 -1, i64 -1, i1 true} +!59 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/icv_tracking.ll b/llvm/test/Transforms/OpenMP/icv_tracking.ll --- a/llvm/test/Transforms/OpenMP/icv_tracking.ll +++ b/llvm/test/Transforms/OpenMP/icv_tracking.ll @@ -675,5 +675,8 @@ declare i32 @__gxx_personality_v0(...) +!llvm.module.flags = !{!2} + !0 = !{!1} !1 = !{i64 2, i64 -1, i64 -1, i1 true} +!2 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll --- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature ; RUN: opt -S -attributor -openmp-opt-cgscc < %s | FileCheck %s ; RUN: opt -S -passes='attributor,cgscc(openmp-opt-cgscc)' < %s | FileCheck %s -; target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" %struct.ident_t = type { i32, i32, i32, i32, i8* } @@ -22,12 +21,11 @@ ; #pragma omp parallel ; {} ; } -; ; We delete all but the first of the parallel regions in this test. define void @delete_parallel_0() { ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_0() { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0:@.*]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0:[0-9]+]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) ; CHECK-NEXT: ret void ; entry: @@ -40,9 +38,9 @@ define internal void @.omp_outlined.willreturn(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR0:#.*]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @unknown() [[ATTR0]] +; CHECK-NEXT: call void @unknown() #[[ATTR0]] ; CHECK-NEXT: ret void ; entry: @@ -52,9 +50,9 @@ define internal void @.omp_outlined.willreturn.0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) willreturn { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR1:#.*]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @readonly() [[ATTR4:#.*]] +; CHECK-NEXT: call void @readonly() #[[ATTR4:[0-9]+]] ; CHECK-NEXT: ret void ; entry: @@ -64,9 +62,9 @@ define internal void @.omp_outlined.willreturn.1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.1 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR2:#.*]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @readnone() [[ATTR0]] +; CHECK-NEXT: call void @readnone() #[[ATTR0]] ; CHECK-NEXT: ret void ; entry: @@ -76,7 +74,7 @@ define internal void @.omp_outlined.willreturn.2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.2 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR3:#.*]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; @@ -94,14 +92,13 @@ ; #pragma omp parallel ; {} ; } -; ; We delete only the last parallel regions in this test because the others might not return. define void @delete_parallel_1() { ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_1() { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) ; CHECK-NEXT: ret void ; entry: @@ -126,9 +123,9 @@ define internal void @.omp_outlined..0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..0 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR4]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @readonly() [[ATTR4]] +; CHECK-NEXT: call void @readonly() #[[ATTR4]] ; CHECK-NEXT: ret void ; entry: @@ -138,7 +135,7 @@ define internal void @.omp_outlined..1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR5:#.*]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @readnone() ; CHECK-NEXT: ret void @@ -150,7 +147,7 @@ define internal void @.omp_outlined..2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) [[ATTR3]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void ; @@ -180,7 +177,6 @@ ; ++a; ; } ; } -; ; FIXME: We do not realize that `a` is dead and all accesses to it can be removed ; making the parallel regions readonly and deletable. define void @delete_parallel_2() { @@ -188,12 +184,12 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[TMP:%.*]] = bitcast i32* [[A]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) [[ATTR0]] +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR0]] ; CHECK-NEXT: store i32 0, i32* [[A]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP1]]) ; CHECK-NEXT: ret void @@ -214,9 +210,9 @@ define internal void @.omp_outlined..3(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) [[ATTR6:#.*]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() [[ATTR12:#.*]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR14:[0-9]+]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: @@ -247,14 +243,14 @@ ; CHECK-SAME: (i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull [[GLOB0]], i32 [[TMP]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] ; CHECK: omp_if.then: ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 ; CHECK-NEXT: store i32 [[INC]], i32* [[A]], align 4 -; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* noundef nonnull [[GLOB0]], i32 [[TMP]]) +; CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: br label [[OMP_IF_END]] ; CHECK: omp_if.end: ; CHECK-NEXT: ret void @@ -288,19 +284,19 @@ ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 ; CHECK-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull [[GLOB0]]) [[ATTR12]] +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR14]] ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull [[GLOB0]], i32 [[TMP]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] ; CHECK: omp_if.then: ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 ; CHECK-NEXT: store i32 [[INC]], i32* [[A]], align 4 -; CHECK-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef nonnull [[GLOB0]], i32 [[TMP]]) +; CHECK-NEXT: call void @__kmpc_end_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: br label [[OMP_IF_END]] ; CHECK: omp_if.end: -; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef nonnull [[GLOB1:@.*]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* noundef nonnull @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK-NEXT: ret void ; entry: @@ -329,13 +325,13 @@ ; CHECK-NEXT: [[A1:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 ; CHECK-NEXT: [[TMP:%.*]] = bitcast i32* [[A1]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) [[ATTR0]] +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR0]] ; CHECK-NEXT: store i32 1, i32* [[A1]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32** ; CHECK-NEXT: store i32* [[A1]], i32** [[TMP1]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -; CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull [[GLOB2:@.*]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull align 8 [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, i8* noundef nonnull align 8 [[TMP3]], void (i8*, i8*)* noundef nonnull @.omp.reduction.reduction_func, [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) ; CHECK-NEXT: switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ ; CHECK-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] ; CHECK-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] @@ -345,7 +341,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[A1]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]] ; CHECK-NEXT: store i32 [[ADD]], i32* [[A]], align 4 -; CHECK-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull [[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) +; CHECK-NEXT: call void @__kmpc_end_reduce_nowait(%struct.ident_t* noundef nonnull @[[GLOB2]], i32 [[TMP2]], [8 x i32]* noundef nonnull @.gomp_critical_user_.reduction.var) ; CHECK-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] ; CHECK: .omp.reduction.case2: ; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[A1]], align 4 @@ -393,7 +389,7 @@ define internal void @.omp.reduction.reduction_func(i8* %arg, i8* %arg1) { ; CHECK-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func -; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) [[ATTR9:#.*]] { +; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32** ; CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8 @@ -439,6 +435,8 @@ declare void @readnone() readnone +!llvm.module.flags = !{!0, !8} + !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!"clang"} !2 = !{!3} @@ -447,3 +445,4 @@ !5 = !{!"int", !6, i64 0} !6 = !{!"omnipotent char", !7, i64 0} !7 = !{!"Simple C/C++ TBAA"} +!8 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll --- a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll @@ -69,7 +69,7 @@ attributes #2 = { readnone willreturn } !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!9, !10, !11, !12, !13} +!llvm.module.flags = !{!9, !10, !11, !12, !13, !52} !llvm.ident = !{!14} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None) @@ -124,3 +124,4 @@ !49 = !DILocalVariable(name: ".global_tid.", arg: 1, scope: !47, type: !28, flags: DIFlagArtificial) !50 = !DILocalVariable(name: ".bound_tid.", arg: 2, scope: !47, type: !28, flags: DIFlagArtificial) !51 = !DILocation(line: 15, column: 2, scope: !47) +!52 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll --- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll +++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll @@ -781,12 +781,6 @@ ret void } - -!llvm.module.flags = !{!0} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{!2} -!2 = !{i64 2, i64 -1, i64 -1, i1 true} ; CHECK-LABEL: define {{[^@]+}}@merge ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: @@ -1897,3 +1891,10 @@ ; CHECK-NEXT: call void @use(i32 [[TMP0]]) ; CHECK-NEXT: ret void ; + +!llvm.module.flags = !{!0, !3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!2} +!2 = !{i64 2, i64 -1, i64 -1, i1 true} +!3 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/rtf_type_checking.ll b/llvm/test/Transforms/OpenMP/rtf_type_checking.ll --- a/llvm/test/Transforms/OpenMP/rtf_type_checking.ll +++ b/llvm/test/Transforms/OpenMP/rtf_type_checking.ll @@ -49,13 +49,14 @@ ; Different return type. declare void @omp_get_thread_num() -!llvm.module.flags = !{!0} +!llvm.module.flags = !{!0, !4} !llvm.ident = !{!1} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!"clang"} !2 = !{!3} !3 = !{i64 2, i64 -1, i64 -1, i1 true} +!4 = !{i32 7, !"openmp", i32 50} ; NPM: Running pass: OpenMPOptCGSCCPass on (.omp_outlined.) ; NPM-NOT: Running pass: OpenMPOptCGSCCPass on (.omp_outlined.) diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll --- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll +++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll @@ -35,10 +35,11 @@ declare dso_local i32 @omp_get_thread_num() -!llvm.module.flags = !{!0} +!llvm.module.flags = !{!0, !4} !llvm.ident = !{!1} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!"clang version 13.0.0"} !2 = !{!3} !3 = !{i64 2, i64 -1, i64 -1, i1 true} +!4 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll --- a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll +++ b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll @@ -70,3 +70,8 @@ declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) declare dso_local i32 @rand(...) + +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"openmp", i32 50} +