Index: include/polly/LinkAllPasses.h =================================================================== --- include/polly/LinkAllPasses.h +++ include/polly/LinkAllPasses.h @@ -16,6 +16,7 @@ #define POLLY_LINKALLPASSES_H #include "polly/Config/config.h" +#include "polly/Simplify.h" #include "polly/Support/DumpModulePass.h" #include "llvm/ADT/StringRef.h" #include @@ -87,6 +88,7 @@ polly::createFlattenSchedulePass(); polly::createDeLICMPass(); polly::createDumpModulePass("", true); + polly::createSimplifyPass(); } } PollyForcePassLinking; // Force link by creating a global definition. } // namespace Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -1381,6 +1381,11 @@ /// be eliminated too. void removeMemoryAccess(MemoryAccess *MA); + /// Remove @p MA from this statement. + /// + /// In contrast to removeMemoryAccess(), no other access will be eliminated. + void removeSingleMemoryAccess(MemoryAccess *MA); + typedef MemoryAccessVec::iterator iterator; typedef MemoryAccessVec::const_iterator const_iterator; Index: include/polly/Simplify.h =================================================================== --- /dev/null +++ include/polly/Simplify.h @@ -0,0 +1,30 @@ +//===------ Simplify.h ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Simplify a SCoP by removing unnecessary statements and accesses. +// +//===----------------------------------------------------------------------===// + +#ifndef POLLY_TRANSFORM_SIMPLIFY_H +#define POLLY_TRANSFORM_SIMPLIFY_H + +namespace llvm { +class PassRegistry; +class Pass; +} // namespace llvm + +namespace polly { +llvm::Pass *createSimplifyPass(); +} // namespace polly + +namespace llvm { +void initializeSimplifyPass(llvm::PassRegistry &); +} // namespace llvm + +#endif /* POLLY_TRANSFORM_SIMPLIFY_H */ Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -1819,6 +1819,19 @@ InstructionToAccess.erase(MA->getAccessInstruction()); } +void ScopStmt::removeSingleMemoryAccess(MemoryAccess *MA) { + auto MAIt = std::find(MemAccs.begin(), MemAccs.end(), MA); + assert(MAIt != MemAccs.end()); + MemAccs.erase(MAIt); + + auto It = InstructionToAccess.find(MA->getAccessInstruction()); + if (It != InstructionToAccess.end()) { + It->second.remove(MA); + if (It->second.empty()) + InstructionToAccess.erase(MA->getAccessInstruction()); + } +} + //===----------------------------------------------------------------------===// /// Scop class implement Index: lib/CMakeLists.txt =================================================================== --- lib/CMakeLists.txt +++ lib/CMakeLists.txt @@ -60,6 +60,7 @@ Transform/FlattenSchedule.cpp Transform/FlattenAlgo.cpp Transform/DeLICM.cpp + Transform/Simplify.cpp ${POLLY_HEADER_FILES} ) Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -31,6 +31,7 @@ #include "polly/PolyhedralInfo.h" #include "polly/ScopDetection.h" #include "polly/ScopInfo.h" +#include "polly/Simplify.h" #include "polly/Support/DumpModulePass.h" #include "llvm/Analysis/CFGPrinter.h" #include "llvm/IR/LegacyPassManager.h" @@ -188,6 +189,11 @@ cl::desc("Eliminate scalar loop carried dependences"), cl::Hidden, cl::init(false), cl::cat(PollyCategory)); +static cl::opt + EnableSimplify("polly-enable-simplify", + cl::desc("Simplify SCoP after optimizations"), + cl::init(false), cl::cat(PollyCategory)); + namespace polly { void initializePollyPasses(PassRegistry &Registry) { initializeCodeGenerationPass(Registry); @@ -211,6 +217,7 @@ initializeCodegenCleanupPass(Registry); initializeFlattenSchedulePass(Registry); initializeDeLICMPass(Registry); + initializeSimplifyPass(Registry); initializeDumpModulePass(Registry); } @@ -264,12 +271,14 @@ if (EnablePolyhedralInfo) PM.add(polly::createPolyhedralInfoPass()); - if (EnableDeLICM) - PM.add(polly::createDeLICMPass()); - if (ImportJScop) PM.add(polly::createJSONImporterPass()); + if (EnableDeLICM) + PM.add(polly::createDeLICMPass()); + if (EnableSimplify) + PM.add(polly::createSimplifyPass()); + if (DeadCodeElim) PM.add(polly::createDeadCodeElimPass()); Index: lib/Transform/Simplify.cpp =================================================================== --- /dev/null +++ lib/Transform/Simplify.cpp @@ -0,0 +1,286 @@ +//===------ Simplify.cpp ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Simplify a SCoP by removing unnecessary statements and accesses. +// +//===----------------------------------------------------------------------===// + +#include "polly/Simplify.h" +#include "polly/ScopInfo.h" +#include "polly/ScopPass.h" +#include "polly/Support/GICHelper.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" +#define DEBUG_TYPE "polly-simplify" + +using namespace llvm; +using namespace polly; + +namespace { + +STATISTIC(ScopsProcessed, "Number of SCoPs processed"); +STATISTIC(ScopsModified, "Number of SCoPs simplified"); + +STATISTIC(PairUnequalAccRels, "Number of Load-Store pairs NOT removed because " + "of different access relations"); +STATISTIC(InBetweenStore, "Number of Load-Store pairs NOT removed because " + "there is another store between them"); +STATISTIC(TotalRedundantWritesRemoved, + "Number of writes of same value removed in any SCoP"); +STATISTIC(TotalStmtsRemoved, "Number of statements removed in any SCoP"); + +class Simplify : public ScopPass { +private: + /// The last/current SCoP that is/has been processed. + Scop *S; + + /// Number of redundant writes removed from this SCoP. + int RedundantWritesRemoved = 0; + + /// Number of unnecessary statements removed from the SCoP. + int StmtsRemoved = 0; + + /// Return whether at least one simplification has been applied. + bool isModified() const { + return RedundantWritesRemoved > 0 || StmtsRemoved > 0; + } + + MemoryAccess *getReadAccessForValue(ScopStmt *Stmt, llvm::Value *Val) { + if (!isa(Val)) + return nullptr; + + for (auto *MA : *Stmt) { + if (!MA->isRead()) + continue; + if (MA->getAccessValue() != Val) + continue; + + return MA; + } + + return nullptr; + } + + /// Return a write access that occurs between @p From and @p To. + /// + /// In region statements the order is ignored because we cannot predict it. + /// + /// @param Stmt Statement of both writes. + /// @param From Start looking after this access. + /// @param To Stop looking at this access, with the access itself. + /// @param Targets Look for an access that may wrote to one of these elements. + /// + /// @return A write access between @p From and @p To that writes to at least + /// one element in @p Targets. + MemoryAccess *hasWriteBetween(ScopStmt *Stmt, MemoryAccess *From, + MemoryAccess *To, IslPtr Targets) { + auto TargetsSpace = give(isl_map_get_space(Targets.keep())); + + bool Started = Stmt->isRegionStmt(); + for (auto *Acc : *Stmt) { + if (Acc->isLatestScalarKind()) + continue; + + if (Stmt->isBlockStmt() && From == Acc) { + assert(!Started); + Started = true; + continue; + } + if (Stmt->isBlockStmt() && To == Acc) { + assert(Started); + return nullptr; + } + if (!Started) + continue; + + if (!Acc->isWrite()) + continue; + + auto AccRel = give(Acc->getAccessRelation()); + auto AccRelSpace = give(isl_map_get_space(AccRel.keep())); + + // Spaces being different means that they access different arrays. + if (isl_space_has_equal_tuples(TargetsSpace.keep(), AccRelSpace.keep()) == + isl_bool_false) + continue; + + AccRel = give(isl_map_intersect_domain(AccRel.take(), + Acc->getStatement()->getDomain())); + AccRel = give(isl_map_intersect_params(AccRel.take(), S->getContext())); + auto CommonElt = give(isl_map_intersect(Targets.copy(), AccRel.copy())); + if (isl_map_is_empty(CommonElt.keep()) != isl_bool_true) + return Acc; + } + assert(Stmt->isRegionStmt() && + "To must be encountered in block statements"); + return nullptr; + } + + /// Remove writes that just write the same value already stored in the + /// element. + void removeRedundantWrites() { + // Delay actual removal to not invalidate iterators. + SmallVector StoresToRemove; + + for (auto &Stmt : *S) { + for (auto *WA : Stmt) { + if (!WA->isMustWrite()) + continue; + if (!WA->isLatestArrayKind()) + continue; + if (!isa(WA->getAccessInstruction())) + continue; + + auto ReadingValue = WA->getAccessValue(); + if (!ReadingValue) + continue; + + auto RA = getReadAccessForValue(&Stmt, ReadingValue); + if (!RA) + continue; + if (!RA->isLatestArrayKind()) + continue; + + auto WARel = give(WA->getLatestAccessRelation()); + WARel = give(isl_map_intersect_domain(WARel.take(), + WA->getStatement()->getDomain())); + WARel = give(isl_map_intersect_params(WARel.take(), S->getContext())); + auto RARel = give(RA->getLatestAccessRelation()); + RARel = give(isl_map_intersect_domain(RARel.take(), + RA->getStatement()->getDomain())); + RARel = give(isl_map_intersect_params(RARel.take(), S->getContext())); + + if (isl_map_is_equal(RARel.keep(), WARel.keep()) != isl_bool_true) { + PairUnequalAccRels++; + DEBUG(dbgs() << "Not cleaning up " << WA + << " because of unequal access relations:\n"); + DEBUG(dbgs() << " RA: " << RARel << "\n"); + DEBUG(dbgs() << " WA: " << WARel << "\n"); + continue; + } + + if (auto *Conflicting = hasWriteBetween(&Stmt, RA, WA, WARel)) { + InBetweenStore++; + DEBUG(dbgs() << "Not cleaning up " << WA + << " because there is another store to the same element " + "between\n"); + DEBUG(Conflicting->print(dbgs())); + continue; + } + + StoresToRemove.push_back(WA); + } + } + + for (auto *WA : StoresToRemove) { + auto Stmt = WA->getStatement(); + auto AccRel = give(WA->getAccessRelation()); + auto AccVal = WA->getAccessValue(); + + DEBUG(dbgs() << "Cleanup of " << WA << ":\n"); + DEBUG(dbgs() << " Scalar: " << *AccVal << "\n"); + DEBUG(dbgs() << " AccRel: " << AccRel << "\n"); + + Stmt->removeSingleMemoryAccess(WA); + + RedundantWritesRemoved++; + TotalRedundantWritesRemoved++; + } + } + + /// Remove statements without side effects. + void removeUnnecessayStmts() { + auto NumStmtsBefore = S->getSize(); + S->simplifySCoP(true); + assert(NumStmtsBefore >= S->getSize()); + StmtsRemoved = NumStmtsBefore - S->getSize(); + DEBUG(dbgs() << "Removed " << StmtsRemoved << " (of " << NumStmtsBefore + << ") statements\n"); + TotalStmtsRemoved += StmtsRemoved; + } + + /// Print simplification statistics to @p OS. + void printStatistics(llvm::raw_ostream &OS, int Indent = 0) const { + OS.indent(Indent) << "Statistics {\n"; + OS.indent(Indent + 4) << "Redundant writes removed: " + << RedundantWritesRemoved << "\n"; + OS.indent(Indent + 4) << "Stmts removed: " << StmtsRemoved << "\n"; + OS.indent(Indent) << "}\n"; + } + + /// Print the current state of all MemoryAccesses to @p OS. + void printAccesses(llvm::raw_ostream &OS, int Indent = 0) const { + OS.indent(Indent) << "After accesses {\n"; + for (auto &Stmt : *S) { + OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; + for (auto *MA : Stmt) + MA->print(OS); + } + OS.indent(Indent) << "}\n"; + } + +public: + static char ID; + explicit Simplify() : ScopPass(ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequiredTransitive(); + AU.setPreservesAll(); + } + + virtual bool runOnScop(Scop &S) override { + // Reset statistics of last processed SCoP. + releaseMemory(); + + // Prepare processing of this SCoP. + this->S = &S; + ScopsProcessed++; + + DEBUG(dbgs() << "Removing redundant writes...\n"); + removeRedundantWrites(); + + DEBUG(dbgs() << "Removing statements without side effects...\n"); + removeUnnecessayStmts(); + + if (isModified()) + ScopsModified++; + DEBUG(dbgs() << "\nFinal Scop:\n"); + DEBUG(S.print(dbgs())); + + return false; + } + + virtual void printScop(raw_ostream &OS, Scop &S) const override { + assert(&S == this->S && + "Can only print analysis for the last processed SCoP"); + printStatistics(OS); + + if (!isModified()) { + OS << "SCoP could not be simplified\n"; + return; + } + printAccesses(OS); + } + + virtual void releaseMemory() override { + S = nullptr; + StmtsRemoved = 0; + } +}; + +char Simplify::ID; +} // anonymous namespace + +Pass *polly::createSimplifyPass() { return new Simplify(); } + +INITIALIZE_PASS_BEGIN(Simplify, "polly-simplify", "Polly - Simplify", false, + false) +INITIALIZE_PASS_END(Simplify, "polly-simplify", "Polly - Simplify", false, + false) Index: test/Simplify/pass_existence.ll =================================================================== --- /dev/null +++ test/Simplify/pass_existence.ll @@ -0,0 +1,33 @@ +; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck %s +; +; Simple test for the existence of the Simplify pass. +; +; for (int j = 0; j < n; j += 1) +; A[0] = 0.0; +; +define void @func(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + store double 0.0, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: SCoP could not be simplified Index: test/Simplify/redundant.ll =================================================================== --- /dev/null +++ test/Simplify/redundant.ll @@ -0,0 +1,41 @@ +; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck %s -match-full-lines +; +; Remove redundant store (a store that writes the same value already +; at the destination) +; +; for (int j = 0; j < n; j += 1) +; A[0] = A[0]; +; +define void @func(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + %val = load double, double* %A + store double %val, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Redundant writes removed: 1 +; CHECK: Stmts removed: 1 +; CHECK: } + +; CHECK: After accesses { +; CHECK-NEXT: } Index: test/Simplify/redundant_differentindex.ll =================================================================== --- /dev/null +++ test/Simplify/redundant_differentindex.ll @@ -0,0 +1,40 @@ +; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck %s -match-full-lines +; RUN: opt %loadPolly -polly-simplify -disable-output -stats < %s 2>&1 | FileCheck %s --check-prefix=STATS -match-full-lines +; REQUIRES: asserts +; +; A store that has a different index than the load it is storing is +; not redundant. +; +; for (int j = 0; j < n; j += 1) +; A[0] = A[0]; +; +define void @func(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + %val = load double, double* %A + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %val, double* %A_idx + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: SCoP could not be simplified + +; STATS: 1 polly-simplify - Number of Load-Store pairs NOT removed because of different access relations Index: test/Simplify/redundant_storebetween.ll =================================================================== --- /dev/null +++ test/Simplify/redundant_storebetween.ll @@ -0,0 +1,41 @@ +; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck %s -match-full-lines +; RUN: opt %loadPolly -polly-simplify -disable-output -stats < %s 2>&1 | FileCheck %s --check-prefix=STATS -match-full-lines +; REQUIRES: asserts +; +; Don't remove store where there is another store to the same target +; in-between them. +; +; for (int j = 0; j < n; j += 1) +; A[0] = A[0]; +; +define void @func(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + %val = load double, double* %A_idx + store double 0.0, double* %A + store double %val, double* %A_idx + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: SCoP could not be simplified + +; STATS: 1 polly-simplify - Number of Load-Store pairs NOT removed because there is another store between them