diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -71,6 +71,7 @@ void initializeAliasSetPrinterPass(PassRegistry&); void initializeAlignmentFromAssumptionsPass(PassRegistry&); void initializeAlwaysInlinerLegacyPassPass(PassRegistry&); +void initializeOpenMPOptLegacyPassPass(PassRegistry &); void initializeArgPromotionPass(PassRegistry&); void initializeAssumptionCacheTrackerPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -71,6 +71,7 @@ (void) llvm::createAggressiveDCEPass(); (void) llvm::createAggressiveInstCombinerPass(); (void) llvm::createBitTrackingDCEPass(); + (void) llvm::createOpenMPOptLegacyPass(); (void) llvm::createArgumentPromotionPass(); (void) llvm::createAlignmentFromAssumptionsPass(); (void) llvm::createBasicAAWrapperPass(); diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -150,6 +150,10 @@ /// Pass *createArgumentPromotionPass(unsigned maxElements = 3); +//===----------------------------------------------------------------------===// +/// createOpenMPOptLegacyPass - OpenMP specific optimizations. +Pass *createOpenMPOptLegacyPass(); + //===----------------------------------------------------------------------===// /// createIPConstantPropagationPass - This pass propagates constants from call /// sites into the bodies of functions. diff --git a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h @@ -0,0 +1,26 @@ +//===- IPO/OpenMPOpt.h - Collection of OpenMP optimizations -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_OPENMP_OPT_H +#define LLVM_TRANSFORMS_IPO_OPENMP_OPT_H + +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// OpenMP optimizations pass. +struct OpenMPOptPass : public PassInfoMixin { + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_OPENMP_OPT_H diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -133,6 +133,7 @@ initializeSimpleInlinerPass(R); initializePruneEHPass(R); initializeGlobalDCELegacyPassPass(R); + initializeOpenMPOptLegacyPassPass(R); initializeArgPromotionPass(R); initializeJumpThreadingPass(R); initializeSROALegacyPassPass(R); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -85,6 +85,7 @@ #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" +#include "llvm/Transforms/IPO/OpenMPOpt.h" #include "llvm/Transforms/IPO/PartialInlining.h" #include "llvm/Transforms/IPO/SCCP.h" #include "llvm/Transforms/IPO/SampleProfile.h" @@ -816,6 +817,10 @@ if (Level == O3) MainCGPipeline.addPass(ArgumentPromotionPass()); + // Try to perform OpenMP specific optimizations. This is a no-op if there are + // no OpenMP runtime calls present in the module. + MainCGPipeline.addPass(OpenMPOptPass()); + // Lastly, add the core function simplification pipeline nested inside the // CGSCC walk. MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -108,6 +108,7 @@ CGSCC_PASS("invalidate", InvalidateAllAnalysesPass()) CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass()) CGSCC_PASS("inline", InlinerPass()) +CGSCC_PASS("openmpopt", OpenMPOptPass()) CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass()) #undef CGSCC_PASS diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -26,6 +26,7 @@ LoopExtractor.cpp LowerTypeTests.cpp MergeFunctions.cpp + OpenMPOpt.cpp PartialInlining.cpp PassManagerBuilder.cpp PruneEH.cpp diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -23,6 +23,7 @@ using namespace llvm; void llvm::initializeIPO(PassRegistry &Registry) { + initializeOpenMPOptLegacyPassPass(Registry); initializeArgPromotionPass(Registry); initializeCalledValuePropagationLegacyPassPass(Registry); initializeConstantMergeLegacyPassPass(Registry); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -0,0 +1,377 @@ +//===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// OpenMP specific optimizations: +// +// - Deduplication of runtime calls, e.g., omp_get_thread_num. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/OpenMPOpt.h" + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/OpenMPConstants.h" +#include "llvm/Transforms/IPO.h" + +using namespace llvm; +using namespace omp; +using namespace types; + +#define DEBUG_TYPE "openmp-opt" + +static cl::opt DisableOpenMPOptimizations( + "openmp-opt-disable", cl::ZeroOrMore, + cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, + cl::init(false)); + +static constexpr auto TAG = "[OpenMPOpt] "; + +namespace { +struct OpenMPOpt { + + OpenMPOpt(SmallPtrSetImpl &SCC, + SmallPtrSetImpl &ModuleSlice, + function_ref CGUpdater = nullptr) + : M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice), + CGUpdater(CGUpdater) {} + + /// Generic information that describes a runtime function + struct RuntimeFunctionInfo { + /// The kind, as described by the RuntimeFunction enum. + RuntimeFunction Kind; + + /// The name of the function. + StringRef Name; + + /// Flag to indicate a variadic function. + bool IsVarArg; + + /// The return type of the function. + Type *ReturnType; + + /// The argument types of the function. + SmallVector ArgumentTypes; + + /// The declaration if available. + Function *Declaration; + + /// Uses of this runtime function per function containing the use. + DenseMap> UsesMap; + + /// Return the number of arguments (or the minimal number for variadic + /// functions). + size_t getNumArgs() const { return ArgumentTypes.size(); } + + /// Run the callback \p CB on each use and forget the use if the result is + /// true. The callback will be fed the function in which the use was + /// encountered as second argument. + void foreachUse(function_ref CB) { + SmallVector ToBeDeleted; + for (auto &It : UsesMap) { + ToBeDeleted.clear(); + for (Use *U : It.second) + if (CB(*U, *It.first)) + ToBeDeleted.push_back(U); + for (Use *U : ToBeDeleted) + It.second.erase(U); + } + } + }; + + bool run() { + initializeTypes(M); + if (!initializeRuntimeFunctions(M)) { + DisableOpenMPOptimizations = true; + return false; + } + + bool Changed = false; + LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() + << " functions in a slice with " << ModuleSlice.size() + << " functions\n"); + Changed |= deduplicateRuntimeCalls(); + return Changed; + } + +private: + /// Try to eliminiate runtime calls by reusing existing ones. + bool deduplicateRuntimeCalls() { + bool Changed = false; + + SmallSetVector GTIdArgs; + collectGlobalThreadIdArguments(GTIdArgs); + LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() + << " global thread ID arguments\n"); + + for (Function *F : SCC) { + Value *GTIdArg = nullptr; + llvm::any_of(F->args(), [&](Argument &Arg) { + return GTIdArg = GTIdArgs.count(&Arg) ? &Arg : nullptr; + }); + Changed |= deduplicateRuntimeCalls( + *F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); + Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_thread_num]); + } + + return Changed; + } + + /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or + /// \p ReplVal if given. + bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI, + Value *ReplVal = nullptr) { + auto &Uses = RFI.UsesMap[&F]; + if (Uses.size() + (ReplVal != nullptr) < 2) + return false; + + LLVM_DEBUG(dbgs() << TAG << "Deduplicate " << Uses.size() << " uses of " + << RFI.Name + << (ReplVal ? " with an existing value\n" : "\n") + << "\n"); + assert(!ReplVal || (isa(ReplVal) && + cast(ReplVal)->getParent() == &F) && + "Unexpected replacement value!"); + if (!ReplVal) { + for (Use *U : Uses) + if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { + CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); + ReplVal = CI; + break; + } + if (!ReplVal) + return false; + } + + bool Changed = false; + auto ReplaceAndDeleteCB = [&](Use &U, Function &) { + CallInst *CI = getCallIfRegularCall(U, &RFI); + if (!CI || CI == ReplVal) + return false; + assert(CI->getCaller() == &F && "Unexpected call!"); + if (CGUpdater) + CGUpdater(*CI, nullptr); + CI->replaceAllUsesWith(ReplVal); + CI->eraseFromParent(); + Changed = true; + return true; + }; + RFI.foreachUse(ReplaceAndDeleteCB); + + return Changed; + } + + /// Collect arguments that represent the global thread id in \p GTIdArgs. + void collectGlobalThreadIdArguments(SmallSetVector >IdArgs) { + // TODO: Below we basically perform a fixpoint iteration with a pessimistic + // initialization. We could define an AbstractAttribute instead and + // run the Attributor here once it can be run as an SCC pass. + + // Helper to check the argument \p ArgNo at all call sites of \p F for + // a GTId. + auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { + if (!F.hasLocalLinkage()) + return false; + for (Use &U : F.uses()) { + if (CallInst *CI = getCallIfRegularCall(U)) { + Value *ArgOp = CI->getArgOperand(ArgNo); + if (CI == &RefCI || GTIdArgs.count(ArgOp) || + getCallIfRegularCall(*ArgOp, + &RFIs[OMPRTL___kmpc_global_thread_num])) + continue; + } + return false; + } + return true; + }; + + // Helper to identify uses of a GTId as GTId arguments. + auto AddUserArgs = [&](Value >Id) { + for (Use &U : GTId.uses()) + if (CallInst *CI = dyn_cast(U.getUser())) + if (CI->isArgOperand(&U)) + if (Function *Callee = CI->getCalledFunction()) + if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) + GTIdArgs.insert(Callee->getArg(U.getOperandNo())); + }; + + // The argument users of __kmpc_global_thread_num calls are GTIds. + RuntimeFunctionInfo &GlobThreadNumRFI = + RFIs[OMPRTL___kmpc_global_thread_num]; + for (auto &It : GlobThreadNumRFI.UsesMap) + for (Use *U : It.second) + if (CallInst *CI = getCallIfRegularCall(*U, &GlobThreadNumRFI)) + AddUserArgs(*CI); + + // Transitively search for more arguments by looking at the users of the + // ones we know already. + for (unsigned u = 0; u < GTIdArgs.size(); ++u) + AddUserArgs(*GTIdArgs[u]); + } + + /// Return the call if \p U is a callee use in a regular call. If \p RFI is + /// given it has to be the callee or a nullptr is returned. + CallInst *getCallIfRegularCall(Use &U, RuntimeFunctionInfo *RFI = nullptr) { + CallInst *CI = dyn_cast(U.getUser()); + if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && + (!RFI || CI->getCalledFunction() == RFI->Declaration)) + return CI; + return nullptr; + } + + /// Return the call if \p V is a regular call. If \p RFI is given it has to be + /// the callee or a nullptr is returned. + CallInst *getCallIfRegularCall(Value &V, RuntimeFunctionInfo *RFI = nullptr) { + CallInst *CI = dyn_cast(&V); + if (CI && !CI->hasOperandBundles() && + (!RFI || CI->getCalledFunction() == RFI->Declaration)) + return CI; + return nullptr; + } + + /// Helper to initialize all runtime function information for those defined in + /// OpenMPKinds.def. + bool initializeRuntimeFunctions(Module &M) { + bool FoundAny = false; + + // Helper to collect all uses of the decleration in the UsesMap. + auto CollectUses = [&](RuntimeFunctionInfo &RFI) { + unsigned NumUses = 0; + if (!RFI.Declaration) + return NumUses; + for (Use &U : RFI.Declaration->uses()) { + if (Instruction *UserI = dyn_cast(U.getUser())) { + if (ModuleSlice.count(UserI->getFunction())) { + RFI.UsesMap[UserI->getFunction()].insert(&U); + ++NumUses; + } + } else { + RFI.UsesMap[nullptr].insert(&U); + ++NumUses; + } + } + return NumUses; + }; + +#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ + { \ + auto &RFI = RFIs[_Enum]; \ + RFI.Kind = _Enum; \ + RFI.Name = _Name; \ + RFI.IsVarArg = _IsVarArg; \ + RFI.ReturnType = _ReturnType; \ + RFI.ArgumentTypes = SmallVector({__VA_ARGS__}); \ + RFI.Declaration = M.getFunction(_Name); \ + unsigned NumUses = CollectUses(RFI); \ + FoundAny |= (RFI.Declaration != nullptr); \ + (void)NumUses; \ + LLVM_DEBUG({ \ + dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ + << " found\n"; \ + if (RFI.Declaration) \ + dbgs() << TAG << "-> got " << NumUses << " uses in " \ + << RFI.UsesMap.size() << " different functions.\n"; \ + }); \ + } +#include "llvm/IR/OpenMPKinds.def" + // TODO: We should validate the declaration agains the types we expect. + return FoundAny; + } + + /// The underyling module. + Module &M; + + /// The SCC we are operating on. + SmallPtrSetImpl &SCC; + + /// The slice of the module we are allowed to look at. + SmallPtrSetImpl &ModuleSlice; + + /// Callback to update the call graph, the first argument is a removed call, + /// the second an optional replacement call. + function_ref CGUpdater; + + /// Map from runtime function kind to the runtime function description. + std::map RFIs; +}; +} // namespace + +PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C, + CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR) { + if (DisableOpenMPOptimizations) + return PreservedAnalyses::all(); + + SmallPtrSet SCC; + for (LazyCallGraph::Node &N : C) + SCC.insert(&N.getFunction()); + if (SCC.empty()) + return PreservedAnalyses::all(); + + // TODO: Compute the module slice we are allowed to look at. + OpenMPOpt OMPOpt(SCC, SCC); + bool Changed = OMPOpt.run(); + (void)Changed; + return PreservedAnalyses::all(); +} + +namespace { + +struct OpenMPOptLegacyPass : public CallGraphSCCPass { + static char ID; + OpenMPOptLegacyPass() : CallGraphSCCPass(ID) { + initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + CallGraphSCCPass::getAnalysisUsage(AU); + } + + bool runOnSCC(CallGraphSCC &SCC) override; +}; + +} // end anonymous namespace + +char OpenMPOptLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt", + "OpenMP specific optimizations", false, false) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt", + "OpenMP specific optimizations", false, false) + +Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); } + +bool OpenMPOptLegacyPass::runOnSCC(CallGraphSCC &CGSCC) { + if (skipSCC(CGSCC)) + return false; + + SmallPtrSet SCC; + for (CallGraphNode *CGN : CGSCC) + if (Function *Fn = CGN->getFunction()) + if (!Fn->isDeclaration()) + SCC.insert(Fn); + + if (SCC.empty()) + return false; + + CallGraph &CG = getAnalysis().getCallGraph(); + auto CGUpdater = [&](CallBase &CB, CallBase *ReplCB) { + Function *Caller = CB.getCaller(); + if (ReplCB) + CG[Caller]->replaceCallEdge(CB, *ReplCB, CG[ReplCB->getCalledFunction()]); + else + CG[Caller]->removeCallEdgeFor(CB); + }; + + // TODO: Compute the module slice we are allowed to look at. + OpenMPOpt OMPOpt(SCC, SCC, CGUpdater); + return OMPOpt.run(); +} diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -557,6 +557,10 @@ RunInliner = true; } + // Try to perform OpenMP specific optimizations. This is a no-op if there are + // no OpenMP runtime calls present in the module. + MPM.add(createOpenMPOptLegacyPass()); + MPM.add(createPostOrderFunctionAttrsLegacyPass()); if (OptLevel > 2) MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args @@ -880,6 +884,10 @@ // CSFDO instrumentation and use pass. addPGOInstrPasses(PM, /* IsCS */ true); + // Try to perform OpenMP specific optimizations. This is a no-op if there are + // no OpenMP runtime calls present in the module. + PM.add(createOpenMPOptLegacyPass()); + // Optimize globals again if we ran the inliner. if (RunInliner) PM.add(createGlobalOptimizerPass()); diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -127,6 +127,7 @@ ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass +; CHECK-O-NEXT: Running pass: OpenMPOptPass on (foo) ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -107,6 +107,7 @@ ; CHECK-O-NEXT: Running pass: InlinerPass ; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass +; CHECK-O-NEXT: Running pass: OpenMPOptPass on (foo) ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: SROA diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -57,6 +57,7 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: OpenMP specific optimizations ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -60,6 +60,7 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: OpenMP specific optimizations ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: Promote 'by reference' arguments to scalars ; CHECK-NEXT: FunctionPass Manager diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -57,6 +57,7 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: OpenMP specific optimizations ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction diff --git a/llvm/test/Other/pass-pipelines.ll b/llvm/test/Other/pass-pipelines.ll --- a/llvm/test/Other/pass-pipelines.ll +++ b/llvm/test/Other/pass-pipelines.ll @@ -46,6 +46,7 @@ ; CHECK-O2-NEXT: Call Graph SCC Pass Manager ; CHECK-O2-NEXT: Remove unused exception handling info ; CHECK-O2-NEXT: Function Integration/Inlining +; CHECK-O2-NEXT: OpenMP specific optimizations ; CHECK-O2-NEXT: Deduce function attributes ; Next up is the main function pass pipeline. It shouldn't be split up and ; should contain the main loop pass pipeline as well. diff --git a/llvm/test/Transforms/OpenMP/gtid.ll b/llvm/test/Transforms/OpenMP/gtid.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/OpenMP/gtid.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; RUN: opt -openmpopt -S < %s | FileCheck %s +; RUN: opt -passes=openmpopt -S < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +%struct.ident_t = type { i32, i32, i32, i32, i8* } + +@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 + +declare i32 @__kmpc_global_thread_num(%struct.ident_t*) +declare void @useI32(i32) + +define void @external(i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@external +; CHECK-SAME: (i1 [[C:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C2:%.*]] = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[E:%.*]] +; CHECK: t: +; CHECK-NEXT: call void @internal(i32 [[C2]], i32 [[C2]]) +; CHECK-NEXT: call void @useI32(i32 [[C2]]) +; CHECK-NEXT: br label [[M:%.*]] +; CHECK: e: +; CHECK-NEXT: call void @internal(i32 [[C2]], i32 [[C2]]) +; CHECK-NEXT: call void @useI32(i32 [[C2]]) +; CHECK-NEXT: br label [[M]] +; CHECK: m: +; CHECK-NEXT: call void @internal(i32 0, i32 [[C2]]) +; CHECK-NEXT: call void @useI32(i32 [[C2]]) +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %t, label %e +t: + %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @internal(i32 %c0, i32 %c0) + call void @useI32(i32 %c0) + br label %m +e: + %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @internal(i32 %c1, i32 %c1) + call void @useI32(i32 %c1) + br label %m +m: + %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @internal(i32 0, i32 %c2) + call void @useI32(i32 %c2) + ret void +} + +define internal void @internal(i32 %not_gtid, i32 %gtid) { +; CHECK-LABEL: define {{[^@]+}}@internal +; CHECK-SAME: (i32 [[NOT_GTID:%.*]], i32 [[GTID:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[GTID]], [[GTID]] +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[E:%.*]] +; CHECK: t: +; CHECK-NEXT: call void @useI32(i32 [[GTID]]) +; CHECK-NEXT: call void @external(i1 [[C]]) +; CHECK-NEXT: br label [[M:%.*]] +; CHECK: e: +; CHECK-NEXT: call void @useI32(i32 [[GTID]]) +; CHECK-NEXT: br label [[M]] +; CHECK: m: +; CHECK-NEXT: call void @useI32(i32 [[GTID]]) +; CHECK-NEXT: ret void +; +entry: + %cc = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + %c = icmp eq i32 %cc, %gtid + br i1 %c, label %t, label %e +t: + %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @useI32(i32 %c0) + call void @external(i1 %c) + br label %m +e: + %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @useI32(i32 %c1) + br label %m +m: + %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @useI32(i32 %c2) + ret void +}