diff --git a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h --- a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h +++ b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h @@ -20,60 +20,27 @@ /// Summary of a kernel (=entry point for target offloading). using Kernel = Function *; -/// Helper to remember if the module contains OpenMP (runtime calls), to be used -/// foremost with containsOpenMP. -struct OpenMPInModule { - OpenMPInModule &operator=(bool Found) { - if (Found) - Value = OpenMPInModule::OpenMP::FOUND; - else - Value = OpenMPInModule::OpenMP::NOT_FOUND; - return *this; - } - bool isKnown() { return Value != OpenMP::UNKNOWN; } - operator bool() { return Value != OpenMP::NOT_FOUND; } +/// Set of kernels in the module +using KernelSet = SmallPtrSet; - /// Does this function \p F contain any OpenMP runtime calls? - bool containsOMPRuntimeCalls(Function *F) const { - return FuncsWithOMPRuntimeCalls.contains(F); - } +/// Helper to determine if \p M contains OpenMP. +bool containsOpenMP(Module &M); - /// Return the known kernels (=GPU entry points) in the module. - SmallPtrSetImpl &getKernels() { return Kernels; } +/// Helper to determine if \p M is a OpenMP target offloading device module. +bool isOpenMPDevice(Module &M); - /// Identify kernels in the module and populate the Kernels set. - void identifyKernels(Module &M); - -private: - enum class OpenMP { FOUND, NOT_FOUND, UNKNOWN } Value = OpenMP::UNKNOWN; - - friend bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule); - - /// In which functions are OpenMP runtime calls present? - SmallPtrSet FuncsWithOMPRuntimeCalls; - - /// Collection of known kernels (=GPU entry points) in the module. - SmallPtrSet Kernels; -}; - -/// Helper to determine if \p M contains OpenMP (runtime calls). -bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule); +/// Get OpenMP device kernels in \p M. +KernelSet getDeviceKernels(Module &M); } // namespace omp /// OpenMP optimizations pass. class OpenMPOptPass : public PassInfoMixin { - /// Helper to remember if the module contains OpenMP (runtime calls). - omp::OpenMPInModule OMPInModule; - public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; class OpenMPOptCGSCCPass : public PassInfoMixin { - /// Helper to remember if the module contains OpenMP (runtime calls). - omp::OpenMPInModule OMPInModule; - public: PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1629,7 +1629,7 @@ for (auto *F : SCC) { if (!F->isDeclaration()) A.getOrCreateAAFor(IRPosition::function(*F)); - if (!OMPInfoCache.Kernels.empty()) + if (isOpenMPDevice(M)) A.getOrCreateAAFor(IRPosition::function(*F)); } } @@ -2629,17 +2629,18 @@ } PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { - if (!containsOpenMP(M, OMPInModule)) + if (!containsOpenMP(M)) return PreservedAnalyses::all(); - if (DisableOpenMPOptimizations) return PreservedAnalyses::all(); + KernelSet Kernels = getDeviceKernels(M); + // Create internal copies of each function if this is a kernel Module. DenseSet InternalizedFuncs; - if (!OMPInModule.getKernels().empty()) + if (isOpenMPDevice(M)) for (Function &F : M) - if (!F.isDeclaration() && !OMPInModule.getKernels().contains(&F)) + if (!F.isDeclaration() && !Kernels.contains(&F)) if (Attributor::internalizeFunction(F, /* Force */ true)) InternalizedFuncs.insert(&F); @@ -2665,10 +2666,9 @@ CallGraphUpdater CGUpdater; SetVector Functions(SCC.begin(), SCC.end()); - OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, - OMPInModule.getKernels()); + OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels); - unsigned MaxFixponitIterations = (!OMPInModule.getKernels().empty()) ? 64 : 32; + unsigned MaxFixponitIterations = (Kernels.empty()) ? 64 : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, MaxFixponitIterations, OREGetter, DEBUG_TYPE); @@ -2684,30 +2684,25 @@ CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) { - if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule)) + if (!containsOpenMP(*C.begin()->getFunction().getParent())) return PreservedAnalyses::all(); - if (DisableOpenMPOptimizations) return PreservedAnalyses::all(); SmallVector SCC; // If there are kernels in the module, we have to run on all SCC's. - bool SCCIsInteresting = !OMPInModule.getKernels().empty(); for (LazyCallGraph::Node &N : C) { Function *Fn = &N.getFunction(); SCC.push_back(Fn); - - // Do we already know that the SCC contains kernels, - // or that OpenMP functions are called from this SCC? - if (SCCIsInteresting) - continue; - // If not, let's check that. - SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn); } - if (!SCCIsInteresting || SCC.empty()) + if (SCC.empty()) return PreservedAnalyses::all(); + Module &M = *C.begin()->getFunction().getParent(); + + KernelSet Kernels = getDeviceKernels(M); + FunctionAnalysisManager &FAM = AM.getResult(C, CG).getManager(); @@ -2723,9 +2718,9 @@ SetVector Functions(SCC.begin(), SCC.end()); OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, - /*CGSCC*/ Functions, OMPInModule.getKernels()); + /*CGSCC*/ Functions, Kernels); - unsigned MaxFixponitIterations = (!OMPInModule.getKernels().empty()) ? 64 : 32; + unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 64 : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, MaxFixponitIterations, OREGetter, DEBUG_TYPE); @@ -2741,7 +2736,6 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass { CallGraphUpdater CGUpdater; - OpenMPInModule OMPInModule; static char ID; OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) { @@ -2752,38 +2746,27 @@ CallGraphSCCPass::getAnalysisUsage(AU); } - bool doInitialization(CallGraph &CG) override { - // Disable the pass if there is no OpenMP (runtime call) in the module. - containsOpenMP(CG.getModule(), OMPInModule); - return false; - } - bool runOnSCC(CallGraphSCC &CGSCC) override { - if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule)) + if (!containsOpenMP(CGSCC.getCallGraph().getModule())) return false; if (DisableOpenMPOptimizations || skipSCC(CGSCC)) return false; SmallVector SCC; // If there are kernels in the module, we have to run on all SCC's. - bool SCCIsInteresting = !OMPInModule.getKernels().empty(); for (CallGraphNode *CGN : CGSCC) { Function *Fn = CGN->getFunction(); if (!Fn || Fn->isDeclaration()) continue; SCC.push_back(Fn); - - // Do we already know that the SCC contains kernels, - // or that OpenMP functions are called from this SCC? - if (SCCIsInteresting) - continue; - // If not, let's check that. - SCCIsInteresting |= OMPInModule.containsOMPRuntimeCalls(Fn); } - if (!SCCIsInteresting || SCC.empty()) + if (SCC.empty()) return false; + Module &M = CGSCC.getCallGraph().getModule(); + KernelSet Kernels = getDeviceKernels(M); + CallGraph &CG = getAnalysis().getCallGraph(); CGUpdater.initialize(CG, CGSCC); @@ -2799,11 +2782,11 @@ AnalysisGetter AG; SetVector Functions(SCC.begin(), SCC.end()); BumpPtrAllocator Allocator; - OMPInformationCache InfoCache( - *(Functions.back()->getParent()), AG, Allocator, - /*CGSCC*/ Functions, OMPInModule.getKernels()); + OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, + Allocator, + /*CGSCC*/ Functions, Kernels); - unsigned MaxFixponitIterations = (!OMPInModule.getKernels().empty()) ? 64 : 32; + unsigned MaxFixponitIterations = (isOpenMPDevice(M)) ? 64 : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, MaxFixponitIterations, OREGetter, DEBUG_TYPE); @@ -2816,11 +2799,13 @@ } // end anonymous namespace -void OpenMPInModule::identifyKernels(Module &M) { - +KernelSet llvm::omp::getDeviceKernels(Module &M) { + // TODO: Create a more cross-platform way of determining device kernels. NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); + KernelSet Kernels; + if (!MD) - return; + return Kernels; for (auto *Op : MD->operands()) { if (Op->getNumOperands() < 2) @@ -2838,38 +2823,24 @@ Kernels.insert(KernelFn); } -} -bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) { - if (OMPInModule.isKnown()) - return OMPInModule; + return Kernels; +} - auto RecordFunctionsContainingUsesOf = [&](Function *F) { - for (User *U : F->users()) - if (auto *I = dyn_cast(U)) - OMPInModule.FuncsWithOMPRuntimeCalls.insert(I->getFunction()); - }; +bool llvm::omp::containsOpenMP(Module &M) { + Metadata *MD = M.getModuleFlag("openmp"); + if (!MD) + return false; - // MSVC doesn't like long if-else chains for some reason and instead just - // issues an error. Work around it.. - do { -#define OMP_RTL(_Enum, _Name, ...) \ - if (Function *F = M.getFunction(_Name)) { \ - RecordFunctionsContainingUsesOf(F); \ - OMPInModule = true; \ - } -#include "llvm/Frontend/OpenMP/OMPKinds.def" - } while (false); + return true; +} - // Identify kernels once. TODO: We should split the OMPInformationCache into a - // module and an SCC part. The kernel information, among other things, could - // go into the module part. - if (OMPInModule.isKnown() && OMPInModule) { - OMPInModule.identifyKernels(M); - return true; - } +bool llvm::omp::isOpenMPDevice(Module &M) { + Metadata *MD = M.getModuleFlag("openmp-device"); + if (!MD) + return false; - return OMPInModule = false; + return true; } char OpenMPOptCGSCCLegacyPass::ID = 0; diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -1739,3 +1739,6 @@ ; OPTIMISTIC: ; Function Attrs: convergent noinline nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture nofree readonly, i32) +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll b/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll --- a/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll @@ -26,3 +26,7 @@ ; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_syncwarp(i64) + +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll --- a/llvm/test/Transforms/OpenMP/deduplication.ll +++ b/llvm/test/Transforms/OpenMP/deduplication.ll @@ -221,3 +221,7 @@ call void @useI32(i32 %tid5) ret void } + +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll --- a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll +++ b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll @@ -30,7 +30,7 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!8, !9, !10, !11, !12} +!llvm.module.flags = !{!8, !9, !10, !11, !12, !29} !llvm.ident = !{!13} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None) @@ -62,3 +62,4 @@ !26 = !DILocation(line: 9, column: 10, scope: !14) !27 = !DILocation(line: 10, column: 2, scope: !14) !28 = !DILocation(line: 13, column: 1, scope: !14) +!29 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/globalization_remarks.ll b/llvm/test/Transforms/OpenMP/globalization_remarks.ll --- a/llvm/test/Transforms/OpenMP/globalization_remarks.ll +++ b/llvm/test/Transforms/OpenMP/globalization_remarks.ll @@ -4,13 +4,14 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" +; CHECK: remark: globalization_remarks.c:5:7: Could not move globalized variable to the stack. Variable is potentially captured. ; CHECK: remark: globalization_remarks.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. @S = external local_unnamed_addr global i8* define void @foo() { entry: - %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !8 + %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10 %x_on_stack = bitcast i8* %0 to i32* %1 = bitcast i32* %x_on_stack to i8* call void @share(i8* %1) @@ -30,13 +31,17 @@ !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4} +!llvm.module.flags = !{!3, !4, !5, !6} +!nvvm.annotations = !{!7, !8} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "globalization_remarks.c", directory: "/tmp/globalization_remarks.c") !2 = !{} !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{i32 1, !"wchar_size", i32 4} -!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !7, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -!7 = !DISubroutineType(types: !2) -!8 = !DILocation(line: 5, column: 7, scope: !6) +!5 = !{i32 7, !"openmp", i32 50} +!6 = !{i32 7, !"openmp-device", i32 50} +!7 = !{void ()* @foo, !"kernel", i32 1} +!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!9 = !DISubroutineType(types: !2) +!10 = !DILocation(line: 5, column: 7, scope: !8) diff --git a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll --- a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll +++ b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll @@ -19,9 +19,11 @@ ; Needed to trigger the openmp-opt pass declare dso_local void @__kmpc_kernel_prepare_parallel(i8*) +!llvm.module.flags = !{!4} !nvvm.annotations = !{!2, !0, !1, !3, !1, !2} !0 = !{void ()* @kernel1, !"kernel", i32 1} !1 = !{void ()* @non_kernel, !"non_kernel", i32 1} !2 = !{null, !"align", i32 1} !3 = !{void ()* @kernel2, !"kernel", i32 1} +!4 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll --- a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll +++ b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll @@ -275,5 +275,8 @@ !nvvm.annotations = !{!1} +!llvm.module.flags = !{!2, !3} !1 = !{void ()* @__omp_offloading_50_6dfa0f01_foo_l6, !"kernel", i32 1} +!2 = !{i32 7, !"openmp", i32 50} +!3 = !{i32 7, !"openmp-device", i32 50} diff --git a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll --- a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll +++ b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll @@ -522,3 +522,7 @@ ; CHECK: declare void @__tgt_target_data_begin_mapper_issue(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**, %struct.__tgt_async_info*) ; CHECK: declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info*) + +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/icv_remarks.ll b/llvm/test/Transforms/OpenMP/icv_remarks.ll --- a/llvm/test/Transforms/OpenMP/icv_remarks.ll +++ b/llvm/test/Transforms/OpenMP/icv_remarks.ll @@ -67,7 +67,7 @@ attributes #5 = { nounwind readnone speculatable willreturn } !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!13, !14, !15} +!llvm.module.flags = !{!13, !14, !15, !59} !llvm.ident = !{!16} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git 73cea83a6f5ab521edf3cccfc603534776d691ec)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None) @@ -129,3 +129,4 @@ !56 = !DILocation(line: 18, column: 1, scope: !33) !57 = !{!58} !58 = !{i64 2, i64 -1, i64 -1, i1 true} +!59 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/icv_tracking.ll b/llvm/test/Transforms/OpenMP/icv_tracking.ll --- a/llvm/test/Transforms/OpenMP/icv_tracking.ll +++ b/llvm/test/Transforms/OpenMP/icv_tracking.ll @@ -675,5 +675,8 @@ declare i32 @__gxx_personality_v0(...) +!llvm.module.flags = !{!2} + !0 = !{!1} !1 = !{i64 2, i64 -1, i64 -1, i1 true} +!2 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll --- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll @@ -739,6 +739,8 @@ declare void @readnone() readnone +!llvm.module.flags = !{!8} + !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!"clang"} !2 = !{!3} @@ -747,3 +749,4 @@ !5 = !{!"int", !6, i64 0} !6 = !{!"omnipotent char", !7, i64 0} !7 = !{!"Simple C/C++ TBAA"} +!8 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll --- a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll @@ -69,7 +69,7 @@ attributes #2 = { readnone willreturn } !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!9, !10, !11, !12, !13} +!llvm.module.flags = !{!9, !10, !11, !12, !13, !52} !llvm.ident = !{!14} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None) @@ -124,3 +124,4 @@ !49 = !DILocalVariable(name: ".global_tid.", arg: 1, scope: !47, type: !28, flags: DIFlagArtificial) !50 = !DILocalVariable(name: ".bound_tid.", arg: 2, scope: !47, type: !28, flags: DIFlagArtificial) !51 = !DILocation(line: 15, column: 2, scope: !47) +!52 = !{i32 7, !"openmp", i32 50} diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll --- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll +++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll @@ -782,11 +782,12 @@ } -!llvm.module.flags = !{!0} +!llvm.module.flags = !{!0, !3} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!2} !2 = !{i64 2, i64 -1, i64 -1, i1 true} +!3 = !{i32 7, !"openmp", i32 50} ; CHECK-LABEL: define {{[^@]+}}@merge ; CHECK-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: @@ -6908,3 +6909,4 @@ ; CHECK2-NEXT: call void @use(i32 [[TMP0]]) ; CHECK2-NEXT: ret void ; + diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -30,7 +30,7 @@ ; CHECK-NEXT: ret void ; entry: - %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !9 + %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !11 call void @use(i8* %0) call void @__kmpc_free_shared(i8* %0) ret void @@ -46,7 +46,7 @@ ; CHECK-NEXT: ret void ; entry: - %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10 + %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !12 call void @share(i8* %0) call void @__kmpc_free_shared(i8* %0) ret void @@ -76,7 +76,7 @@ declare void @__kmpc_free_shared(i8*) !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4} +!llvm.module.flags = !{!3, !4, !6, !7} !nvvm.annotations = !{!5} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) @@ -85,8 +85,10 @@ !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{i32 1, !"wchar_size", i32 4} !5 = !{void ()* @kernel, !"kernel", i32 1} -!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -!8 = !DISubroutineType(types: !2) -!9 = !DILocation(line: 2, column: 2, scope: !6) -!10 = !DILocation(line: 4, column: 2, scope: !7) +!6 = !{i32 7, !"openmp", i32 50} +!7 = !{i32 7, !"openmp-device", i32 50} +!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!10 = !DISubroutineType(types: !2) +!11 = !DILocation(line: 2, column: 2, scope: !8) +!12 = !DILocation(line: 4, column: 2, scope: !9) diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -35,7 +35,7 @@ %cmp = icmp eq i32 %tid, 0 br i1 %cmp, label %master, label %exit master: - %x = call i8* @__kmpc_alloc_shared(i64 16), !dbg !9 + %x = call i8* @__kmpc_alloc_shared(i64 16), !dbg !11 %x_on_stack = bitcast i8* %x to [4 x i32]* %0 = bitcast [4 x i32]* %x_on_stack to i8* call void @use(i8* %0) @@ -58,7 +58,7 @@ %3 = icmp eq i32 %tid, %master_tid br i1 %3, label %master, label %exit master: - %y = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10 + %y = call i8* @__kmpc_alloc_shared(i64 4), !dbg !12 %y_on_stack = bitcast i8* %y to [4 x i32]* %4 = bitcast [4 x i32]* %y_on_stack to i8* call void @use(i8* %4) @@ -87,18 +87,19 @@ !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4} -!nvvm.annotations = !{!5, !6} - +!llvm.module.flags = !{!3, !4, !5, !6} +!nvvm.annotations = !{!7, !8} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c") !2 = !{} !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{i32 1, !"wchar_size", i32 4} -!5 = !{void ()* @foo, !"kernel", i32 1} -!6 = !{void ()* @bar, !"kernel", i32 1} -!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -!8 = !DISubroutineType(types: !2) -!9 = !DILocation(line: 5, column: 7, scope: !7) -!10 = !DILocation(line: 5, column: 14, scope: !7) +!5 = !{i32 7, !"openmp", i32 50} +!6 = !{i32 7, !"openmp-device", i32 50} +!7 = !{void ()* @foo, !"kernel", i32 1} +!8 = !{void ()* @bar, !"kernel", i32 1} +!9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!10 = !DISubroutineType(types: !2) +!11 = !DILocation(line: 5, column: 7, scope: !9) +!12 = !DILocation(line: 5, column: 14, scope: !9) diff --git a/llvm/test/Transforms/OpenMP/rtf_type_checking.ll b/llvm/test/Transforms/OpenMP/rtf_type_checking.ll --- a/llvm/test/Transforms/OpenMP/rtf_type_checking.ll +++ b/llvm/test/Transforms/OpenMP/rtf_type_checking.ll @@ -49,13 +49,14 @@ ; Different return type. declare void @omp_get_thread_num() -!llvm.module.flags = !{!0} +!llvm.module.flags = !{!0, !4} !llvm.ident = !{!1} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{!"clang"} !2 = !{!3} !3 = !{i64 2, i64 -1, i64 -1, i1 true} +!4 = !{i32 7, !"openmp", i32 50} ; NPM: Running pass: OpenMPOptCGSCCPass on (.omp_outlined.) ; NPM-NOT: Running pass: OpenMPOptCGSCCPass on (.omp_outlined.) diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll --- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll +++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll @@ -68,8 +68,8 @@ declare void @__kmpc_kernel_init(i32, i16) !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4} -!nvvm.annotations = !{!5} +!llvm.module.flags = !{!3, !4, !5, !6} +!nvvm.annotations = !{!7} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) @@ -77,4 +77,6 @@ !2 = !{} !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{i32 1, !"wchar_size", i32 4} -!5 = !{void ()* @kernel, !"kernel", i32 1} +!5 = !{i32 7, !"openmp", i32 50} +!6 = !{i32 7, !"openmp-device", i32 50} +!7 = !{void ()* @kernel, !"kernel", i32 1} diff --git a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll --- a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll +++ b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll @@ -70,3 +70,8 @@ declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) declare dso_local i32 @rand(...) + +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"openmp", i32 50} + diff --git a/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll b/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll --- a/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll +++ b/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll @@ -9,7 +9,7 @@ define void @foo() { entry: - %x = call i8* @__kmpc_alloc_shared(i64 4), !dbg !7 + %x = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10 %x_on_stack = bitcast i8* %x to i32* %0 = bitcast i32* %x_on_stack to i8* call void @use(i8* %0) @@ -36,13 +36,17 @@ declare void @__kmpc_free_shared(i8*) !llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4} +!llvm.module.flags = !{!3, !4, !5, !6} +!nvvm.annotations = !{!7} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) !1 = !DIFile(filename: "openmp_opt_module.c", directory: "/tmp/openmp_opt_module.c") !2 = !{} !3 = !{i32 2, !"Debug Info Version", i32 3} !4 = !{i32 1, !"wchar_size", i32 4} -!5 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) -!6 = !DISubroutineType(types: !2) -!7 = !DILocation(line: 5, column: 7, scope: !5) +!5 = !{i32 7, !"openmp", i32 50} +!6 = !{i32 7, !"openmp-device", i32 50} +!7 = !{void ()* @foo, !"kernel", i32 1} +!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!9 = !DISubroutineType(types: !2) +!10 = !DILocation(line: 5, column: 7, scope: !8)