Index: include/llvm-c/Transforms/Scalar.h =================================================================== --- include/llvm-c/Transforms/Scalar.h +++ include/llvm-c/Transforms/Scalar.h @@ -41,6 +41,9 @@ /** See llvm::createAlignmentFromAssumptionsPass function. */ void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM); +/** See llvm::createRemoveOpenMPRedundanciesPass function. */ +void LLVMAddRemoveOpenMPRedundanciesPass(LLVMPassManagerRef PM); + /** See llvm::createCFGSimplificationPass function. */ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM); Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -299,6 +299,7 @@ void initializeRegionPrinterPass(PassRegistry&); void initializeRegionViewerPass(PassRegistry&); void initializeRegisterCoalescerPass(PassRegistry&); +void initializeRemoveOpenMPRedundanciesPass(PassRegistry&); void initializeStripGCRelocatesPass(PassRegistry&); void initializeRenameIndependentSubregsPass(PassRegistry&); void initializeResetMachineFunctionPass(PassRegistry &); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -148,6 +148,7 @@ (void) llvm::createRegionOnlyViewerPass(); (void) llvm::createRegionPrinterPass(); (void) llvm::createRegionViewerPass(); + (void) llvm::createRemoveOpenMPRedundanciesPass(); (void) llvm::createSCCPPass(); (void) llvm::createSafeStackPass(); (void) llvm::createSROAPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -45,6 +45,12 @@ //===----------------------------------------------------------------------===// // +// RemoveOpenMPRedundancies - Remove redundant OpenMP barrier & flush calls. +// +FunctionPass *createRemoveOpenMPRedundanciesPass(); + +//===----------------------------------------------------------------------===// +// // SCCP - Sparse conditional constant propagation. // FunctionPass *createSCCPPass(); Index: include/llvm/Transforms/Scalar/RemoveOpenMPRedundancies.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Scalar/RemoveOpenMPRedundancies.h @@ -0,0 +1,33 @@ +//===---- RemoveOpenMPRedundancies.h ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements redundant OpenMP runtime library call removal. For +// instance, adjacent barrier/flush calls are not necessary since they repeat +// the same operation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_REMOVEOPENMPREDUNDANCIES_H +#define LLVM_TRANSFORMS_SCALAR_REMOVEOPENMPREDUNDANCIES_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct RemoveOpenMPRedundanciesPass + : public PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + // Glue for old PM. + bool runImpl(Function &F); +}; +} + +#endif // LLVM_TRANSFORMS_SCALAR_REMOVEOPENMPREDUNDANCIES_H Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -118,6 +118,7 @@ #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" #include "llvm/Transforms/Scalar/Reassociate.h" +#include "llvm/Transforms/Scalar/RemoveOpenMPRedundancies.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SROA.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" @@ -370,6 +371,9 @@ FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); #endif + // Remove redundant OpenMP runtime calls. + FPM.addPass(RemoveOpenMPRedundanciesPass()); + // Finally, do an expensive DCE pass to catch all the dead code exposed by // the simplifications and basic cleanup after all the simplifications. FPM.addPass(ADCEPass()); Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -184,6 +184,7 @@ FUNCTION_PASS("print", RegionInfoPrinterPass(dbgs())) FUNCTION_PASS("print", ScalarEvolutionPrinterPass(dbgs())) FUNCTION_PASS("reassociate", ReassociatePass()) +FUNCTION_PASS("remove-openmp-redundancies", RemoveOpenMPRedundanciesPass()) FUNCTION_PASS("sccp", SCCPPass()) FUNCTION_PASS("simplify-cfg", SimplifyCFGPass()) FUNCTION_PASS("sink", SinkingPass()) Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -376,6 +376,8 @@ if (LoadCombine) MPM.add(createLoadCombinePass()); + MPM.add(createRemoveOpenMPRedundanciesPass()); // Remove redundant OpenMP runtime calls + MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Clean up after everything. Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -47,6 +47,7 @@ PlaceSafepoints.cpp Reassociate.cpp Reg2Mem.cpp + RemoveOpenMPRedundancies.cpp RewriteStatepointsForGC.cpp SCCP.cpp SROA.cpp Index: lib/Transforms/Scalar/RemoveOpenMPRedundancies.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/RemoveOpenMPRedundancies.cpp @@ -0,0 +1,148 @@ +//===----------------------- RemoveOpenMPRedundancies.cpp -----------------===// +// Remove Redundant OpenMP Barrier & Flush Calls +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements redundant OpenMP runtime library call removal. For +// instance, adjacent barrier/flush calls are not necessary since they repeat +// the same operation. +// +//===----------------------------------------------------------------------===// + +#define ROR_NAME "remove-openmp-redundancies" +#define DEBUG_TYPE ROR_NAME +#include "llvm/Transforms/Scalar/RemoveOpenMPRedundancies.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static cl::opt + EnableRTLOpts("optimize-openmp-rtl-calls", cl::init(true), cl::Hidden, + cl::desc("Optimize OpenMP runtime-library calls")); + +STATISTIC(NumBarrierKilled, "Number of OpenMP barriers removed"); +STATISTIC(NumFlushKilled, "Number of OpenMP flushes removed"); + +namespace { +struct RemoveOpenMPRedundancies : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + RemoveOpenMPRedundancies() : FunctionPass(ID) { + initializeRemoveOpenMPRedundanciesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + } + + RemoveOpenMPRedundanciesPass Impl; +}; +} + +char RemoveOpenMPRedundancies::ID = 0; +INITIALIZE_PASS_BEGIN(RemoveOpenMPRedundancies, ROR_NAME, + "Remove OpenMP redundancies", false, false) +INITIALIZE_PASS_END(RemoveOpenMPRedundancies, ROR_NAME, + "Remove OpenMP redundancies", false, false) + +FunctionPass *llvm::createRemoveOpenMPRedundanciesPass() { + return new RemoveOpenMPRedundancies(); +} + +bool RemoveOpenMPRedundancies::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + return Impl.runImpl(F); +} + +bool RemoveOpenMPRedundanciesPass::runImpl(Function &F) { + + if (!EnableRTLOpts) + return false; + + bool Changed = false; + SmallVector CallsToDelete; + + // Remove redundant OpenMP barrier & flush within a + // basic block + for (auto &BB : F) { + CallInst *LastBarrier = nullptr, *LastFlush = nullptr; + for (auto &I : BB) { + if (auto *CI = dyn_cast(&I)) { + auto *Callee = CI->getCalledFunction(); + if (Callee->getName() == "__kmpc_barrier") { + if (LastBarrier) { + ++NumBarrierKilled; + CallsToDelete.push_back(CI); + } + LastBarrier = CI; + continue; + } else if (Callee->getName() == "__kmpc_flush") { + // OpenMP barrier includes an implicit flush, so we + // also need to check last barrier here. + if (LastFlush || LastBarrier) { + ++NumFlushKilled; + CallsToDelete.push_back(CI); + } + LastFlush = CI; + continue; + } + } + + if (!I.mayHaveSideEffects()) + continue; + + LastBarrier = LastFlush = nullptr; + } + } + + for (auto *CI : CallsToDelete) { + CI->eraseFromParent(); + Changed = true; + } + + return Changed; +} + +PreservedAnalyses +RemoveOpenMPRedundanciesPass::run(Function &F, FunctionAnalysisManager &AM) { + + bool Changed = runImpl(F); + + if (!Changed) + return PreservedAnalyses::all(); + // FIXME: Not sure what else to preserve + PreservedAnalyses PA; + PA.preserveSet(); + PA.preserve(); + PA.preserve(); + PA.preserve(); + PA.preserve(); + PA.preserve(); + return PA; +} Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -93,6 +93,7 @@ initializeLoopLoadEliminationPass(Registry); initializeLoopSimplifyCFGLegacyPassPass(Registry); initializeLoopVersioningPassPass(Registry); + initializeRemoveOpenMPRedundanciesPass(Registry); } void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { @@ -111,6 +112,10 @@ unwrap(PM)->add(createAlignmentFromAssumptionsPass()); } +void LLVMAddRemoveOpenMPRedundanciesPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createRemoveOpenMPRedundanciesPass()); +} + void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createCFGSimplificationPass()); } Index: test/Transforms/RemoveOpenMPRedundancies/simple.ll =================================================================== --- /dev/null +++ test/Transforms/RemoveOpenMPRedundancies/simple.ll @@ -0,0 +1,130 @@ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +; RUN: opt < %s -remove-openmp-redundancies -S | FileCheck %s +; RUN: opt < %s -passes=remove-openmp-redundancies -S | FileCheck %s + +%ident_t = type { i32, i32, i32, i32, i8* } + +@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +@0 = private unnamed_addr constant %ident_t { i32 0, i32 66, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr constant %ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 + +; A barrier after a barrier. The second barrier should be removed. +define i32 @foo(i32* nocapture %a) nounwind uwtable readonly { +entry: + %Val = add i32 0, 0 + call void @__kmpc_barrier(%ident_t* nonnull @0, i32 %Val) + %0 = load i32, i32* %a, align 4 + call void @__kmpc_barrier(%ident_t* nonnull @1, i32 %Val) + ret i32 %0 + +; CHECK-LABEL: @foo +; CHECK: call void @__kmpc_barrier(%ident_t* nonnull @0, i32 %Val) +; CHECK-NOT: call void @__kmpc_barrier(%ident_t* nonnull @1, i32 %Val) +; CHECK: ret i32 +} + +; A flush after a barrier. The flush should be removed. +define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly { +entry: + %Val = add i32 0, 0 + call void @__kmpc_barrier(%ident_t* nonnull @0, i32 %Val) + %0 = load i32, i32* %a, align 4 + call void @__kmpc_flush(%ident_t* nonnull @1) + ret i32 %0 + +; CHECK-LABEL: @foo2 +; CHECK: call void @__kmpc_barrier(%ident_t* nonnull @0, i32 %Val) +; CHECK-NOT: call void @__kmpc_flush(%ident_t* nonnull @1) +; CHECK: ret i32 +} + +; A flush after a barrier. The flush should not be removed. +define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly { +entry: + %Val = add i32 0, 0 + call void @__kmpc_barrier(%ident_t* nonnull @0, i32 %Val) + %0 = load i32, i32* %a, align 4 + store i32 %0, i32* %a, align 4 + call void @__kmpc_flush(%ident_t* nonnull @1) + ret i32 %0 + +; CHECK-LABEL: @foo2a +; CHECK: call void @__kmpc_barrier(%ident_t* nonnull @0, i32 %Val) +; CHECK: call void @__kmpc_flush(%ident_t* nonnull @1) +; CHECK: ret i32 +} + +; A flush after a flush. The flush should be removed. +define i32 @foo3(i32* nocapture %a) nounwind uwtable readonly { +entry: + %Val = add i32 0, 0 + call void @__kmpc_flush(%ident_t* nonnull @0) + %0 = load i32, i32* %a, align 4 + call void @__kmpc_flush(%ident_t* nonnull @1) + ret i32 %0 + +; CHECK-LABEL: @foo3 +; CHECK: call void @__kmpc_flush(%ident_t* nonnull @0) +; CHECK-NOT: call void @__kmpc_flush(%ident_t* nonnull @1) +; CHECK: ret i32 +} + +; A flush after a flush. The flush should not be removed. +define i32 @foo3a(i32* nocapture %a) nounwind uwtable readonly { +entry: + %Val = add i32 0, 0 + %0 = load i32, i32* %a, align 4 + call void @__kmpc_flush(%ident_t* nonnull @0) + store i32 %0, i32* %a, align 4 + call void @__kmpc_flush(%ident_t* nonnull @1) + ret i32 %0 + +; CHECK-LABEL: @foo3a +; CHECK: call void @__kmpc_flush(%ident_t* nonnull @0) +; CHECK: call void @__kmpc_flush(%ident_t* nonnull @1) +; CHECK: ret i32 +} + +; A barrier after a flush. The barrier should not be removed. +; We are not removing this flush now since flush will ensure +; ordering for memory operations before and after it. +define i32 @foo4(i32* nocapture %a) nounwind uwtable readonly { +entry: + %Val = add i32 0, 0 + call void @__kmpc_flush(%ident_t* nonnull @1) + call void @__kmpc_barrier(%ident_t* nonnull @0, i32 %Val) + %0 = load i32, i32* %a, align 4 + ret i32 %0 + +; CHECK-LABEL: @foo4 +; CHECK: call void @__kmpc_flush(%ident_t* nonnull @1) +; CHECK: call void @__kmpc_barrier(%ident_t* nonnull @0, i32 %Val) +; CHECK: ret i32 +} + +; Complicated scenario +define i32 @foo5(i32* nocapture %a) nounwind uwtable readonly { +entry: + %Val = add i32 0, 0 + call void @__kmpc_flush(%ident_t* nonnull @1) + call void @__kmpc_barrier(%ident_t* nonnull @0, i32 %Val) + %0 = load i32, i32* %a, align 4 + call void @__kmpc_barrier(%ident_t* nonnull @0, i32 %Val) + %1 = load i32, i32* %a, align 4 + store i32 %1, i32* %a, align 4 + call void @__kmpc_flush(%ident_t* nonnull @1) + %2 = load i32, i32* %a, align 4 + call void @__kmpc_flush(%ident_t* nonnull @1) + ret i32 %2 + +; CHECK-LABEL: @foo5 +; CHECK: call void @__kmpc_flush(%ident_t* nonnull @1) +; CHECK: call void @__kmpc_barrier(%ident_t* nonnull @0, i32 %Val) +; CHECK: call void @__kmpc_flush(%ident_t* nonnull @1) +; CHECK-NOT: call void @__kmpc_flush(%ident_t* nonnull @1) +; CHECK: ret i32 +} + +declare void @__kmpc_barrier(%ident_t*, i32) local_unnamed_addr + +declare void @__kmpc_flush(%ident_t*) local_unnamed_addr