diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -75,7 +75,7 @@ void initializeAssumeBuilderPassLegacyPassPass(PassRegistry &); void initializeAnnotation2MetadataLegacyPass(PassRegistry &); void initializeAnnotationRemarksLegacyPass(PassRegistry &); -void initializeOpenMPOptLegacyPassPass(PassRegistry &); +void initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry &); void initializeArgPromotionPass(PassRegistry&); void initializeAssumptionCacheTrackerPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -71,7 +71,7 @@ (void) llvm::createAggressiveDCEPass(); (void) llvm::createAggressiveInstCombinerPass(); (void) llvm::createBitTrackingDCEPass(); - (void) llvm::createOpenMPOptLegacyPass(); + (void)llvm::createOpenMPOptCGSCCLegacyPass(); (void) llvm::createArgumentPromotionPass(); (void) llvm::createAlignmentFromAssumptionsPass(); (void) llvm::createBasicAAWrapperPass(); diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -160,7 +160,7 @@ //===----------------------------------------------------------------------===// /// createOpenMPOptLegacyPass - OpenMP specific optimizations. -Pass *createOpenMPOptLegacyPass(); +Pass *createOpenMPOptCGSCCLegacyPass(); //===----------------------------------------------------------------------===// /// createIPSCCPPass - This pass propagates constants from call sites into the diff --git a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h --- a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h +++ b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h @@ -66,6 +66,14 @@ /// Helper to remember if the module contains OpenMP (runtime calls). omp::OpenMPInModule OMPInModule; +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +class OpenMPOptCGSCCPass : public PassInfoMixin { + /// Helper to remember if the module contains OpenMP (runtime calls). + omp::OpenMPInModule OMPInModule; + public: PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1010,7 +1010,7 @@ // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if // there are no OpenMP runtime calls present in the module. if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) - MainCGPipeline.addPass(OpenMPOptPass()); + MainCGPipeline.addPass(OpenMPOptCGSCCPass()); for (auto &C : CGSCCOptimizerLateEPCallbacks) C(MainCGPipeline, Level); @@ -1108,6 +1108,11 @@ PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */)); } + // Try to perform OpenMP specific optimizations on the module. This is a + // (quick!) no-op if there are no OpenMP runtime calls present in the module. + if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) + MPM.addPass(OpenMPOptPass()); + if (AttributorRun & AttributorRunOption::MODULE) MPM.addPass(AttributorPass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -44,6 +44,7 @@ MODULE_PASS("always-inline", AlwaysInlinerPass()) MODULE_PASS("attributor", AttributorPass()) MODULE_PASS("annotation2metadata", Annotation2MetadataPass()) +MODULE_PASS("openmp-opt", OpenMPOptPass()) MODULE_PASS("called-value-propagation", CalledValuePropagationPass()) MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass()) MODULE_PASS("cg-profile", CGProfilePass()) @@ -138,7 +139,7 @@ CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass()) CGSCC_PASS("attributor-cgscc", AttributorCGSCCPass()) CGSCC_PASS("inline", InlinerPass()) -CGSCC_PASS("openmpopt", OpenMPOptPass()) +CGSCC_PASS("openmp-opt-cgscc", OpenMPOptCGSCCPass()) CGSCC_PASS("coro-split", CoroSplitPass()) CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass()) #undef CGSCC_PASS diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -23,7 +23,7 @@ using namespace llvm; void llvm::initializeIPO(PassRegistry &Registry) { - initializeOpenMPOptLegacyPassPass(Registry); + initializeOpenMPOptCGSCCLegacyPassPass(Registry); initializeArgPromotionPass(Registry); initializeAnnotation2MetadataLegacyPass(Registry); initializeCalledValuePropagationLegacyPassPass(Registry); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -495,7 +495,7 @@ } /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. - bool run() { + bool run(bool IsModulePass) { if (SCC.empty()) return false; @@ -505,28 +505,31 @@ << " functions in a slice with " << OMPInfoCache.ModuleSlice.size() << " functions\n"); - if (PrintICVValues) - printICVs(); - if (PrintOpenMPKernels) - printKernels(); - - Changed |= rewriteDeviceCodeStateMachine(); - - Changed |= runAttributor(); - - // Recollect uses, in case Attributor deleted any. - OMPInfoCache.recollectUses(); - - Changed |= deleteParallelRegions(); - if (HideMemoryTransferLatency) - Changed |= hideMemTransfersLatency(); - if (remarksEnabled()) - analysisGlobalization(); - Changed |= deduplicateRuntimeCalls(); - if (EnableParallelRegionMerging) { - if (mergeParallelRegions()) { - deduplicateRuntimeCalls(); - Changed = true; + if (IsModulePass) { + if (remarksEnabled()) + analysisGlobalization(); + } else { + if (PrintICVValues) + printICVs(); + if (PrintOpenMPKernels) + printKernels(); + + Changed |= rewriteDeviceCodeStateMachine(); + + Changed |= runAttributor(); + + // Recollect uses, in case Attributor deleted any. + OMPInfoCache.recollectUses(); + + Changed |= deleteParallelRegions(); + if (HideMemoryTransferLatency) + Changed |= hideMemTransfersLatency(); + Changed |= deduplicateRuntimeCalls(); + if (EnableParallelRegionMerging) { + if (mergeParallelRegions()) { + deduplicateRuntimeCalls(); + Changed = true; + } } } @@ -967,6 +970,7 @@ for (auto &MergableCIs : MergableCIsVector) Merge(MergableCIs, BB); + MergableCIsVector.clear(); } } @@ -2263,9 +2267,52 @@ return *AA; } -PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, - CGSCCAnalysisManager &AM, - LazyCallGraph &CG, CGSCCUpdateResult &UR) { +PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { + if (!containsOpenMP(M, OMPInModule)) + return PreservedAnalyses::all(); + + if (DisableOpenMPOptimizations) + return PreservedAnalyses::all(); + + // Look at every function definition in the Module. + SmallVector SCC; + for (Function &Fn : M) + if (!Fn.isDeclaration()) + SCC.push_back(&Fn); + + if (SCC.empty()) + return PreservedAnalyses::all(); + + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + + AnalysisGetter AG(FAM); + + auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { + return FAM.getResult(*F); + }; + + BumpPtrAllocator Allocator; + CallGraphUpdater CGUpdater; + + SetVector Functions(SCC.begin(), SCC.end()); + OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, + OMPInModule.getKernels()); + + Attributor A(Functions, InfoCache, CGUpdater); + + OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); + bool Changed = OMPOpt.run(true); + if (Changed) + return PreservedAnalyses::none(); + + return PreservedAnalyses::all(); +} + +PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, + CGSCCAnalysisManager &AM, + LazyCallGraph &CG, + CGSCCUpdateResult &UR) { if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule)) return PreservedAnalyses::all(); @@ -2299,33 +2346,32 @@ return FAM.getResult(*F); }; + BumpPtrAllocator Allocator; CallGraphUpdater CGUpdater; CGUpdater.initialize(CG, C, AM, UR); SetVector Functions(SCC.begin(), SCC.end()); - BumpPtrAllocator Allocator; OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, OMPInModule.getKernels()); Attributor A(Functions, InfoCache, CGUpdater); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); - bool Changed = OMPOpt.run(); + bool Changed = OMPOpt.run(false); if (Changed) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } - namespace { -struct OpenMPOptLegacyPass : public CallGraphSCCPass { +struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass { CallGraphUpdater CGUpdater; OpenMPInModule OMPInModule; static char ID; - OpenMPOptLegacyPass() : CallGraphSCCPass(ID) { - initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry()); + OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) { + initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -2386,7 +2432,7 @@ Attributor A(Functions, InfoCache, CGUpdater); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); - return OMPOpt.run(); + return OMPOpt.run(false); } bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } @@ -2450,12 +2496,14 @@ return OMPInModule = false; } -char OpenMPOptLegacyPass::ID = 0; +char OpenMPOptCGSCCLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt", +INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc", "OpenMP specific optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt", +INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc", "OpenMP specific optimizations", false, false) -Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); } +Pass *llvm::createOpenMPOptCGSCCLegacyPass() { + return new OpenMPOptCGSCCLegacyPass(); +} diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -668,7 +668,7 @@ // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if // there are no OpenMP runtime calls present in the module. if (OptLevel > 1) - MPM.add(createOpenMPOptLegacyPass()); + MPM.add(createOpenMPOptCGSCCLegacyPass()); MPM.add(createPostOrderFunctionAttrsLegacyPass()); if (OptLevel > 2) @@ -1028,7 +1028,7 @@ // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if // there are no OpenMP runtime calls present in the module. if (OptLevel > 1) - PM.add(createOpenMPOptLegacyPass()); + PM.add(createOpenMPOptCGSCCLegacyPass()); // Optimize globals again if we ran the inliner. if (RunInliner) diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -89,6 +89,8 @@ ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. +; CHECK-O2-NEXT: Running pass: OpenMPOptPass +; CHECK-O3-NEXT: Running pass: OpenMPOptPass ; CHECK-EP-PIPELINE-EARLY-SIMPLIFICATION-NEXT: Running pass: NoOpModulePass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass @@ -124,8 +126,8 @@ ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass -; CHECK-O2-NEXT: Running pass: OpenMPOptPass on (foo) -; CHECK-O3-NEXT: Running pass: OpenMPOptPass on (foo) +; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass on (foo) +; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass on (foo) ; CHECK-EP-CGSCC-LATE-NEXT: Running pass: NoOpCGSCCPass ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -74,6 +74,8 @@ ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. +; CHECK-O2-NEXT: Running pass: OpenMPOptPass +; CHECK-O3-NEXT: Running pass: OpenMPOptPass ; CHECK-POSTLINK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass @@ -108,8 +110,8 @@ ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass -; CHECK-O2-NEXT: Running pass: OpenMPOptPass on (foo) -; CHECK-O3-NEXT: Running pass: OpenMPOptPass on (foo) +; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass on (foo) +; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass on (foo) ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -44,6 +44,8 @@ ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. +; CHECK-O2-NEXT: Running pass: OpenMPOptPass +; CHECK-O3-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass @@ -82,8 +84,8 @@ ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass -; CHECK-O2-NEXT: Running pass: OpenMPOptPass -; CHECK-O3-NEXT: Running pass: OpenMPOptPass +; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass +; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -58,6 +58,8 @@ ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion +; CHECK-O2-NEXT: Running pass: OpenMPOptPass +; CHECK-O3-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: LowerTypeTestsPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass @@ -91,8 +93,8 @@ ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass -; CHECK-O2-NEXT: Running pass: OpenMPOptPass -; CHECK-O3-NEXT: Running pass: OpenMPOptPass +; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass +; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -45,6 +45,8 @@ ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. +; CHECK-O2-NEXT: Running pass: OpenMPOptPass +; CHECK-O3-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass @@ -116,8 +118,8 @@ ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass ; CHECK-O3-NEXT: Running analysis: TargetIRAnalysis -; CHECK-O2-NEXT: Running pass: OpenMPOptPass -; CHECK-O3-NEXT: Running pass: OpenMPOptPass +; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass +; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -55,6 +55,8 @@ ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis +; CHECK-O2-NEXT: Running pass: OpenMPOptPass +; CHECK-O3-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass ; CHECK-O-NEXT: Running pass: GlobalOptPass @@ -86,8 +88,8 @@ ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass -; CHECK-O2-NEXT: Running pass: OpenMPOptPass -; CHECK-O3-NEXT: Running pass: OpenMPOptPass +; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass +; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -1,7 +1,7 @@ -; RUN: opt < %s -S -openmpopt | FileCheck %s -; RUN: opt < %s -S -passes=openmpopt | FileCheck %s -; RUN: opt < %s -S -openmpopt -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC -; RUN: opt < %s -S -passes=openmpopt -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC +; RUN: opt < %s -S -openmp-opt-cgscc | FileCheck %s +; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s +; RUN: opt < %s -S -openmp-opt-cgscc -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC +; RUN: opt < %s -S -passes=openmp-opt-cgscc -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" %struct.omp_lock_t = type { i8* } diff --git a/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll b/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll --- a/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes_amdgcn.ll @@ -1,7 +1,7 @@ -; RUN: opt < %s -S -openmpopt | FileCheck %s -; RUN: opt < %s -S -passes=openmpopt | FileCheck %s -; RUN: opt < %s -S -openmpopt -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC -; RUN: opt < %s -S -passes=openmpopt -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC +; RUN: opt < %s -S -openmp-opt-cgscc | FileCheck %s +; RUN: opt < %s -S -passes=openmp-opt-cgscc | FileCheck %s +; RUN: opt < %s -S -openmp-opt-cgscc -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC +; RUN: opt < %s -S -passes=openmp-opt-cgscc -openmp-ir-builder-optimistic-attributes | FileCheck %s --check-prefix=OPTIMISTIC target triple = "amdgcn-amd-amdhsa" diff --git a/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll b/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll --- a/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll +++ b/llvm/test/Transforms/OpenMP/attributor_module_slice_reproducer.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature -; RUN: opt -S -openmpopt < %s | FileCheck %s -; RUN: opt -S -passes=openmpopt < %s | FileCheck %s +; RUN: opt -S -openmp-opt-cgscc < %s | FileCheck %s +; RUN: opt -S -passes=openmp-opt-cgscc < %s | FileCheck %s define internal fastcc void @"_omp$reduction$reduction_func14"() unnamed_addr { %call = call i8 @_ZStplIdESt7complexIT_ERKS2_S4_() diff --git a/llvm/test/Transforms/OpenMP/dead_use.ll b/llvm/test/Transforms/OpenMP/dead_use.ll --- a/llvm/test/Transforms/OpenMP/dead_use.ll +++ b/llvm/test/Transforms/OpenMP/dead_use.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature -; RUN: opt -S -openmpopt < %s | FileCheck %s -; RUN: opt -S -passes=openmpopt < %s | FileCheck %s +; RUN: opt -S -openmp-opt-cgscc < %s | FileCheck %s +; RUN: opt -S -passes=openmp-opt-cgscc < %s | FileCheck %s %struct.ident_t = type { i32, i32, i32, i32, i8* } @.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll --- a/llvm/test/Transforms/OpenMP/deduplication.ll +++ b/llvm/test/Transforms/OpenMP/deduplication.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -openmpopt -S < %s | FileCheck %s -; RUN: opt -passes=openmpopt -S < %s | FileCheck %s +; RUN: opt -openmp-opt-cgscc -S < %s | FileCheck %s +; RUN: opt -passes=openmp-opt-cgscc -S < %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" %struct.ident_t = type { i32, i32, i32, i32, i8* } diff --git a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll --- a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll +++ b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll @@ -1,5 +1,5 @@ -; RUN: opt -openmpopt -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -; RUN: opt -passes=openmpopt -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -openmp-opt-cgscc -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes=openmp-opt-cgscc -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s ; ModuleID = 'deduplication_remarks.c' source_filename = "deduplication_remarks.c" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/OpenMP/globalization_remarks.ll b/llvm/test/Transforms/OpenMP/globalization_remarks.ll --- a/llvm/test/Transforms/OpenMP/globalization_remarks.ll +++ b/llvm/test/Transforms/OpenMP/globalization_remarks.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=openmpopt -pass-remarks-analysis=openmp-opt -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes=openmp-opt -pass-remarks-analysis=openmp-opt -disable-output < %s 2>&1 | FileCheck %s ; ModuleID = 'declare_target_codegen_globalization.cpp' source_filename = "declare_target_codegen_globalization.cpp" target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" diff --git a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll --- a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll +++ b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll @@ -1,5 +1,5 @@ -; RUN: opt -passes=openmpopt -pass-remarks=openmp-opt -openmp-print-gpu-kernels -disable-output < %s 2>&1 | FileCheck %s --implicit-check-not=non_kernel -; RUN: opt -openmpopt -pass-remarks=openmp-opt -openmp-print-gpu-kernels -disable-output < %s 2>&1 | FileCheck %s --implicit-check-not=non_kernel +; RUN: opt -passes=openmp-opt-cgscc -pass-remarks=openmp-opt -openmp-print-gpu-kernels -disable-output < %s 2>&1 | FileCheck %s --implicit-check-not=non_kernel +; RUN: opt -openmp-opt-cgscc -pass-remarks=openmp-opt -openmp-print-gpu-kernels -disable-output < %s 2>&1 | FileCheck %s --implicit-check-not=non_kernel ; CHECK-DAG: remark: :0:0: OpenMP GPU kernel kernel1 ; CHECK-DAG: remark: :0:0: OpenMP GPU kernel kernel2 diff --git a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll --- a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll +++ b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll @@ -1,6 +1,6 @@ -; RUN: opt -S -passes=openmpopt -openmp-ir-builder-optimistic-attributes -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s -; RUN: opt -S -passes=openmpopt -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s -; RUN: opt -S -openmpopt -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s +; RUN: opt -S -passes=openmp-opt-cgscc -openmp-ir-builder-optimistic-attributes -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s +; RUN: opt -S -passes=openmp-opt-cgscc -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s +; RUN: opt -S -openmp-opt-cgscc -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s ; C input used for this test: diff --git a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll --- a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll +++ b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --scrub-attributes -; RUN: opt -S -passes=openmpopt -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency < %s | FileCheck %s +; RUN: opt -S -openmp-opt-cgscc -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency < %s | FileCheck %s +; RUN: opt -S -passes=openmp-opt-cgscc -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency < %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: %struct.__tgt_async_info = type { i8* } diff --git a/llvm/test/Transforms/OpenMP/icv_remarks.ll b/llvm/test/Transforms/OpenMP/icv_remarks.ll --- a/llvm/test/Transforms/OpenMP/icv_remarks.ll +++ b/llvm/test/Transforms/OpenMP/icv_remarks.ll @@ -1,5 +1,5 @@ -; RUN: opt -passes=openmpopt -pass-remarks=openmp-opt -openmp-print-icv-values -disable-output < %s 2>&1 | FileCheck %s -; RUN: opt -openmpopt -pass-remarks=openmp-opt -openmp-print-icv-values -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes=openmp-opt-cgscc -pass-remarks=openmp-opt -openmp-print-icv-values -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -openmp-opt-cgscc -pass-remarks=openmp-opt -openmp-print-icv-values -disable-output < %s 2>&1 | FileCheck %s ; ModuleID = 'icv_remarks.c' source_filename = "icv_remarks.c" diff --git a/llvm/test/Transforms/OpenMP/icv_tracking.ll b/llvm/test/Transforms/OpenMP/icv_tracking.ll --- a/llvm/test/Transforms/OpenMP/icv_tracking.ll +++ b/llvm/test/Transforms/OpenMP/icv_tracking.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature -; RUN: opt -S -openmpopt < %s | FileCheck %s -; RUN: opt -S -passes=openmpopt < %s | FileCheck %s +; RUN: opt -S -openmp-opt-cgscc < %s | FileCheck %s +; RUN: opt -S -passes=openmp-opt-cgscc < %s | FileCheck %s %struct.ident_t = type { i32, i32, i32, i32, i8* } diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll --- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature -; RUN: opt -S -attributor -openmpopt < %s | FileCheck %s -; RUN: opt -S -passes='attributor,cgscc(openmpopt)' < %s | FileCheck %s +; RUN: opt -S -attributor -openmp-opt-cgscc < %s | FileCheck %s +; RUN: opt -S -passes='attributor,cgscc(openmp-opt-cgscc)' < %s | FileCheck %s ; target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll --- a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -pass-remarks=openmp-opt -attributor -openmpopt -disable-output < %s 2>&1 | FileCheck %s -; RUN: opt -S -pass-remarks=openmp-opt -passes='attributor,cgscc(openmpopt)' -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -S -pass-remarks=openmp-opt -attributor -openmp-opt-cgscc -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -S -pass-remarks=openmp-opt -passes='attributor,cgscc(openmp-opt-cgscc)' -disable-output < %s 2>&1 | FileCheck %s ; ModuleID = 'parallel_deletion_remarks.ll' source_filename = "parallel_deletion_remarks.c" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll --- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll +++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs -; RUN: opt -S -attributor -openmpopt -openmp-opt-enable-merging -enable-new-pm=0 < %s | FileCheck %s -; RUN: opt -S -aa-pipeline= -passes='attributor,cgscc(openmpopt)' -openmp-opt-enable-merging < %s | FileCheck %s +; RUN: opt -S -attributor -openmp-opt-cgscc -openmp-opt-enable-merging -enable-new-pm=0 < %s | FileCheck %s +; RUN: opt -S -aa-pipeline= -passes='attributor,cgscc(openmp-opt-cgscc)' -openmp-opt-enable-merging < %s | FileCheck %s ; #include ; void foo(); ; void use(int); diff --git a/llvm/test/Transforms/OpenMP/rtf_type_checking.ll b/llvm/test/Transforms/OpenMP/rtf_type_checking.ll --- a/llvm/test/Transforms/OpenMP/rtf_type_checking.ll +++ b/llvm/test/Transforms/OpenMP/rtf_type_checking.ll @@ -1,7 +1,7 @@ -; RUN: opt -S -openmpopt -stats < %s 2>&1 -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,LPM -; RUN: opt -S -passes='devirt<2>(cgscc(openmpopt))' -stats -debug-pass-manager < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NPM -; RUN: opt -S -attributor -openmpopt -stats < %s 2>&1 -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,LPM -; RUN: opt -S -passes='attributor,cgscc(devirt<2>(openmpopt))' -stats -debug-pass-manager < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NPM +; RUN: opt -S -openmp-opt-cgscc -stats < %s 2>&1 -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,LPM +; RUN: opt -S -passes='devirt<2>(cgscc(openmp-opt-cgscc))' -stats -debug-pass-manager < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NPM +; RUN: opt -S -attributor -openmp-opt-cgscc -stats < %s 2>&1 -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,LPM +; RUN: opt -S -passes='attributor,cgscc(devirt<2>(openmp-opt-cgscc))' -stats -debug-pass-manager < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NPM ; REQUIRES: asserts target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -57,10 +57,10 @@ !2 = !{!3} !3 = !{i64 2, i64 -1, i64 -1, i1 true} -; NPM: Running pass: OpenMPOptPass on (.omp_outlined.) -; NPM-NOT: Running pass: OpenMPOptPass on (.omp_outlined.) -; NPM: Running pass: OpenMPOptPass on (main) -; NPM-NOT: Running pass: OpenMPOptPass on (main) +; NPM: Running pass: OpenMPOptCGSCCPass on (.omp_outlined.) +; NPM-NOT: Running pass: OpenMPOptCGSCCPass on (.omp_outlined.) +; NPM: Running pass: OpenMPOptCGSCCPass on (main) +; NPM-NOT: Running pass: OpenMPOptCGSCCPass on (main) ; ===-------------------------------------------------------------------------=== ; ... Statistics Collected ... ; ===-------------------------------------------------------------------------=== diff --git a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll --- a/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll +++ b/llvm/test/Transforms/OpenMP/values_in_offload_arrays.ll @@ -1,4 +1,5 @@ -; RUN: opt -S -passes=openmpopt -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency -debug-only=openmp-opt < %s 2>&1 | FileCheck %s +; RUN: opt -S -openmp-opt-cgscc -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency -debug-only=openmp-opt < %s 2>&1 | FileCheck %s +; RUN: opt -S -passes=openmp-opt-cgscc -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency -debug-only=openmp-opt < %s 2>&1 | FileCheck %s ; REQUIRES: asserts target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll b/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll @@ -0,0 +1,46 @@ +; RUN: opt -O2 -pass-remarks-analysis=openmp-opt -enable-new-pm < %s 2>&1 | FileCheck %s --check-prefix=MODULE +target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" + +@.str = private unnamed_addr constant [13 x i8] c"Alloc Shared\00", align 1 + +; MODULE: remark: openmp_opt_module.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. + +define void @foo() { +entry: + %x = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0), !dbg !7 + %x_on_stack = bitcast i8* %x to i32* + %0 = bitcast i32* %x_on_stack to i8* + call void @use(i8* %0) + call void @__kmpc_data_sharing_pop_stack(i8* %x) + ret void +} + +define void @use(i8* %0) { +entry: + %.addr = alloca i8*, align 8 + store i8* %0, i8** %.addr, align 8 + ret void +} + +define internal i8* @__kmpc_data_sharing_push_stack(i64 %DataSize, i16 %shared) { +entry: + %call = call i8* @_Z10SafeMallocmPKc(i64 %DataSize, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0)) #11 + ret i8* %call +} + +; Function Attrs: convergent nounwind mustprogress +declare i8* @_Z10SafeMallocmPKc(i64 %size, i8* nocapture readnone %msg) + +declare void @__kmpc_data_sharing_pop_stack(i8*) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "openmp_opt_module.c", directory: "/tmp/openmp_opt_module.c") +!2 = !{} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !6, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!6 = !DISubroutineType(types: !2) +!7 = !DILocation(line: 5, column: 7, scope: !5)