Index: polly/trunk/include/polly/LinkAllPasses.h =================================================================== --- polly/trunk/include/polly/LinkAllPasses.h +++ polly/trunk/include/polly/LinkAllPasses.h @@ -16,6 +16,7 @@ #define POLLY_LINKALLPASSES_H #include "polly/Config/config.h" +#include "polly/PruneUnprofitable.h" #include "polly/Simplify.h" #include "polly/Support/DumpModulePass.h" #include "llvm/ADT/StringRef.h" @@ -89,6 +90,7 @@ polly::createDeLICMPass(); polly::createDumpModulePass("", true); polly::createSimplifyPass(); + polly::createPruneUnprofitablePass(); } } PollyForcePassLinking; // Force link by creating a global definition. } // namespace Index: polly/trunk/include/polly/PruneUnprofitable.h =================================================================== --- polly/trunk/include/polly/PruneUnprofitable.h +++ polly/trunk/include/polly/PruneUnprofitable.h @@ -0,0 +1,30 @@ +//===- PruneUnprofitable.h --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Mark a SCoP as unfeasible if not deemed profitable to optimize. +// +//===----------------------------------------------------------------------===// + +#ifndef POLLY_ANALYSIS_PRUNEUNPROFITABLE_H +#define POLLY_ANALYSIS_PRUNEUNPROFITABLE_H + +namespace llvm { +class PassRegistry; +class Pass; +} // namespace llvm + +namespace polly { +llvm::Pass *createPruneUnprofitablePass(); +} // namespace polly + +namespace llvm { +void initializePruneUnprofitablePass(llvm::PassRegistry &); +} // namespace llvm + +#endif /* POLLY_ANALYSIS_PRUNEUNPROFITABLE_H */ Index: polly/trunk/include/polly/ScopInfo.h =================================================================== --- polly/trunk/include/polly/ScopInfo.h +++ polly/trunk/include/polly/ScopInfo.h @@ -2480,7 +2480,12 @@ void realignParams(); /// Return true if this SCoP can be profitably optimized. - bool isProfitable() const; + /// + /// @param ScalarsAreUnprofitable Never consider statements with scalar writes + /// as profitably optimizable. + /// + /// @return Whether this SCoP can be profitably optimized. + bool isProfitable(bool ScalarsAreUnprofitable) const; /// Return true if the SCoP contained at least one error block. bool hasErrorBlock() const { return HasErrorBlock; } Index: polly/trunk/lib/Analysis/PruneUnprofitable.cpp =================================================================== --- polly/trunk/lib/Analysis/PruneUnprofitable.cpp +++ polly/trunk/lib/Analysis/PruneUnprofitable.cpp @@ -0,0 +1,71 @@ +//===- PruneUnprofitable.cpp ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Mark a SCoP as unfeasible if not deemed profitable to optimize. +// +//===----------------------------------------------------------------------===// + +#include "polly/PruneUnprofitable.h" +#include "polly/ScopInfo.h" +#include "polly/ScopPass.h" +#define DEBUG_TYPE "polly-prune-unprofitable" + +using namespace polly; +using namespace llvm; + +namespace { + +STATISTIC(ScopsProcessed, + "Number of SCoPs considered for unprofitability pruning"); +STATISTIC(ScopsPruned, "Number of pruned SCoPs because it they cannot be " + "optimized in a significant way"); + +class PruneUnprofitable : public ScopPass { +private: + PruneUnprofitable(const PruneUnprofitable &) = delete; + const PruneUnprofitable &operator=(const PruneUnprofitable &) = delete; + +public: + static char ID; + explicit PruneUnprofitable() : ScopPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesAll(); + } + + virtual bool runOnScop(Scop &S) override { + if (PollyProcessUnprofitable) { + DEBUG(dbgs() << "NOTE: -polly-process-unprofitable active, won't prune " + "anything\n"); + return false; + } + + ScopsProcessed++; + + if (!S.isProfitable(true)) { + DEBUG(dbgs() << "SCoP pruned because it probably cannot be optimized in " + "a significant way\n"); + ScopsPruned++; + S.invalidate(PROFITABLE, DebugLoc()); + } + + return false; + } +}; + +char PruneUnprofitable::ID; +} // anonymous namespace + +Pass *polly::createPruneUnprofitablePass() { return new PruneUnprofitable(); } + +INITIALIZE_PASS_BEGIN(PruneUnprofitable, "polly-prune-unprofitable", + "Polly - Prune unprofitable SCoPs", false, false) +INITIALIZE_PASS_END(PruneUnprofitable, "polly-prune-unprofitable", + "Polly - Prune unprofitable SCoPs", false, false) Index: polly/trunk/lib/Analysis/ScopInfo.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp +++ polly/trunk/lib/Analysis/ScopInfo.cpp @@ -3387,7 +3387,7 @@ // Check early for profitability. Afterwards it cannot change anymore, // only the runtime context could become infeasible. - if (!isProfitable()) { + if (!isProfitable(UnprofitableScalarAccs)) { invalidate(PROFITABLE, DebugLoc()); return; } @@ -3901,7 +3901,7 @@ return isl_set_copy(AssumedContext); } -bool Scop::isProfitable() const { +bool Scop::isProfitable(bool ScalarsAreUnprofitable) const { if (PollyProcessUnprofitable) return true; @@ -3918,11 +3918,11 @@ for (auto *MA : Stmt) { if (MA->isRead()) continue; - ContainsArrayAccs |= MA->isArrayKind(); - ContainsScalarAccs |= MA->isScalarKind(); + ContainsArrayAccs |= MA->isLatestArrayKind(); + ContainsScalarAccs |= MA->isLatestScalarKind(); } - if (!UnprofitableScalarAccs || (ContainsArrayAccs && !ContainsScalarAccs)) + if (!ScalarsAreUnprofitable || (ContainsArrayAccs && !ContainsScalarAccs)) OptimizableStmtsOrLoops += Stmt.getNumIterators(); } Index: polly/trunk/lib/CMakeLists.txt =================================================================== --- polly/trunk/lib/CMakeLists.txt +++ polly/trunk/lib/CMakeLists.txt @@ -35,6 +35,7 @@ Analysis/ScopBuilder.cpp Analysis/ScopGraphPrinter.cpp Analysis/ScopPass.cpp + Analysis/PruneUnprofitable.cpp CodeGen/BlockGenerators.cpp ${ISL_CODEGEN_FILES} CodeGen/LoopGenerators.cpp Index: polly/trunk/lib/Support/RegisterPasses.cpp =================================================================== --- polly/trunk/lib/Support/RegisterPasses.cpp +++ polly/trunk/lib/Support/RegisterPasses.cpp @@ -194,6 +194,11 @@ cl::desc("Simplify SCoP after optimizations"), cl::init(false), cl::cat(PollyCategory)); +static cl::opt EnablePruneUnprofitable( + "polly-enable-prune-unprofitable", + cl::desc("Bail out on unprofitable SCoPs before rescheduling"), cl::Hidden, + cl::init(false), cl::cat(PollyCategory)); + namespace polly { void initializePollyPasses(PassRegistry &Registry) { initializeCodeGenerationPass(Registry); @@ -219,6 +224,7 @@ initializeDeLICMPass(Registry); initializeSimplifyPass(Registry); initializeDumpModulePass(Registry); + initializePruneUnprofitablePass(Registry); } /// Register Polly passes such that they form a polyhedral optimizer. @@ -282,6 +288,9 @@ if (DeadCodeElim) PM.add(polly::createDeadCodeElimPass()); + if (EnablePruneUnprofitable) + PM.add(polly::createPruneUnprofitablePass()); + if (Target == TARGET_GPU) { // GPU generation provides its own scheduling optimization strategy. } else { Index: polly/trunk/test/PruneUnprofitable/prune_only_scalardeps.ll =================================================================== --- polly/trunk/test/PruneUnprofitable/prune_only_scalardeps.ll +++ polly/trunk/test/PruneUnprofitable/prune_only_scalardeps.ll @@ -0,0 +1,56 @@ +; RUN: opt %loadPolly -polly-process-unprofitable=false -polly-unprofitable-scalar-accs=false -polly-prune-unprofitable -disable-output -stats < %s 2>&1 | FileCheck -match-full-lines %s +; REQUIRES: asserts +; +; Skip this SCoP for having scalar dependencies between all statements, +; but only after ScopInfo (because optimization passes using ScopInfo such +; as DeLICM might remove these scalar dependencies). +; +; double x = 0; +; for (int i = 0; i < n; i += 1) +; for (int j = 0; j < m; j += 1) { +; B[0] = x; +; x = A[0]; +; } +; return x; +; +define double @func(i32 %n, i32 %m, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %outer.for + +outer.for: + %outer.phi = phi double [0.0, %entry], [%inner.phi, %outer.inc] + %i = phi i32 [0, %entry], [%i.inc, %outer.inc] + %i.cmp = icmp slt i32 %i, %n + br i1 %i.cmp, label %inner.for, label %outer.exit + + inner.for: + %inner.phi = phi double [%outer.phi, %outer.for], [%load, %inner.inc] + %j = phi i32 [0, %outer.for], [%j.inc, %inner.inc] + %j.cmp = icmp slt i32 %j, %m + br i1 %j.cmp, label %body, label %inner.exit + + body: + store double %inner.phi, double* %B + %load = load double, double* %A + br label %inner.inc + + inner.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %inner.for + + inner.exit: + br label %outer.inc + +outer.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret double %outer.phi +} + + +; CHECK: 1 polly-prune-unprofitable - Number of pruned SCoPs because it they cannot be optimized in a significant way