Index: include/polly/ScopBuilder.h =================================================================== --- include/polly/ScopBuilder.h +++ include/polly/ScopBuilder.h @@ -229,6 +229,10 @@ /// @returns True if the instruction should be modeled. bool shouldModelInst(Instruction *Inst, Loop *L); + void buildSequentialBlockStmts(BasicBlock *BB); + + void buildEqivClassBlockStmts(BasicBlock *BB); + /// Create ScopStmt for all BBs and non-affine subregions of @p SR. /// /// @param SR A subregion of @p R. Index: lib/Analysis/ScopBuilder.cpp =================================================================== --- lib/Analysis/ScopBuilder.cpp +++ lib/Analysis/ScopBuilder.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -102,6 +103,17 @@ cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory)); +enum class GranularityChoice { BasicBlocks, ScalarIndepependence }; + +static cl::opt StmtGranularity( + "polly-stmt-granularity", + cl::desc("Select the statement granularity algorithm"), + cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb", + "Entire basic blocks granularity"), + clEnumValN(GranularityChoice::ScalarIndepependence, + "scalar-indep", "Scalar independence heuristic")), + cl::init(GranularityChoice::BasicBlocks), cl::cat(PollyCategory)); + void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI, Region *NonAffineSubRegion, bool IsExitBlock) { @@ -664,6 +676,144 @@ !canSynthesize(Inst, *scop, &SE, L); } +static bool isOrderedInstruction(const Instruction &Inst) { + return Inst.mayHaveSideEffects() || Inst.mayReadOrWriteMemory(); +} + +void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB) { + Loop *SurroundingLoop = LI.getLoopFor(BB); + + int Count = 0; + std::vector Instructions; + for (Instruction &Inst : *BB) { + if (shouldModelInst(&Inst, SurroundingLoop)) + Instructions.push_back(&Inst); + if (Inst.getMetadata("polly_split_after")) { + scop->addScopStmt(BB, SurroundingLoop, Instructions, Count); + Count++; + Instructions.clear(); + } + } + + scop->addScopStmt(BB, SurroundingLoop, Instructions, Count); +} + +void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) { + Loop *L = LI.getLoopFor(BB); + + EquivalenceClasses UnionFind; + Instruction *PHIWrites = nullptr; + UnionFind.insert(PHIWrites); + for (Instruction &Inst : *BB) { + if (!shouldModelInst(&Inst, L)) + continue; + UnionFind.insert(&Inst); + } + + // Ensure that instructions using the same scalar will be in the same + // statement. + for (Instruction &Inst : *BB) { + if (UnionFind.findValue(&Inst) == UnionFind.end()) + continue; + + if (auto PHI = dyn_cast(&Inst)) { + continue; + } + + for (auto &Op : Inst.operands()) { + auto Def = dyn_cast(Op.get()); + if (!Def) + continue; + if (Def->getParent() != BB) + continue; + if (UnionFind.findValue(Def) == UnionFind.end()) + continue; + UnionFind.unionSets(&Inst, Def); + } + } + + // Join the PHI write epilogue if necessary. + for (auto Succ : successors(BB)) { + for (auto &SuccInst : *Succ) { + auto SuccPHI = dyn_cast(&SuccInst); + if (!SuccPHI) + break; + + auto IncomingVal = SuccPHI->getIncomingValueForBlock(BB); + auto IncomingInst = dyn_cast(IncomingVal); + if (!IncomingInst) + continue; + if (IncomingInst->getParent() != BB) + continue; + if (UnionFind.findValue(IncomingInst) == UnionFind.end()) + continue; + UnionFind.unionSets(PHIWrites, IncomingInst); + } + } + + // Ensure that the order of ordered instructions does not change. + SetVector SeenLeaders; // TODO: Maybe string an iterator here + // would not make repeated + // getLeaderValue() necessary. + for (Instruction &Inst : *BB) { + if (!isOrderedInstruction(Inst)) + continue; + + auto LeaderIt = UnionFind.findLeader(&Inst); + if (LeaderIt == UnionFind.member_end()) + continue; + + auto Leader = *LeaderIt; + auto Inserted = SeenLeaders.insert(Leader); + if (Inserted) + continue; + + for (auto Prev : reverse(SeenLeaders)) { + auto PrevLeader = UnionFind.getLeaderValue(SeenLeaders.back()); + if (PrevLeader == Leader) + break; + UnionFind.unionSets(Prev, Leader); + } + } + + SmallPtrSet ProcessedLeaders; + int Count = UnionFind.getNumClasses() > 1; + for (Instruction &Inst : *BB) { + auto LeaderIt = UnionFind.findLeader(&Inst); + assert((LeaderIt != UnionFind.member_end()) == shouldModelInst(&Inst, L)); + if (LeaderIt == UnionFind.member_end()) + continue; + + auto Leader = *LeaderIt; + auto P = ProcessedLeaders.insert(Leader); + if (!P.second) + continue; + + std::vector Instructions; + + // FIXME: Maybe we can avoid this nested loop; I am not sure whether + // UnionFind preserves the member's order. + for (Instruction &LeaderInst : *BB) { + auto LeaderInstLeaderIt = UnionFind.findLeader(&LeaderInst); + if (LeaderInstLeaderIt == UnionFind.member_end()) + continue; + + auto InstLeader = *LeaderInstLeaderIt; + if (InstLeader != Leader) + continue; + + Instructions.push_back(&LeaderInst); + } + + scop->addScopStmt(BB, L, Instructions, Count); + Count += 1; + } + + auto EpiLeader = UnionFind.getLeaderValue(PHIWrites); + if (!ProcessedLeaders.count(EpiLeader)) + scop->addScopStmt(BB, L, {}, Count); +} + void ScopBuilder::buildStmts(Region &SR) { if (scop->isNonAffineSubRegion(&SR)) { std::vector Instructions; @@ -680,23 +830,15 @@ if (I->isSubRegion()) buildStmts(*I->getNodeAs()); else { - int Count = 0; - std::vector Instructions; - for (Instruction &Inst : *I->getNodeAs()) { - Loop *L = LI.getLoopFor(Inst.getParent()); - if (shouldModelInst(&Inst, L)) - Instructions.push_back(&Inst); - if (Inst.getMetadata("polly_split_after")) { - Loop *SurroundingLoop = LI.getLoopFor(I->getNodeAs()); - scop->addScopStmt(I->getNodeAs(), SurroundingLoop, - Instructions, Count); - Count++; - Instructions.clear(); - } + auto *BB = I->getNodeAs(); + switch (StmtGranularity) { + case GranularityChoice::BasicBlocks: + buildSequentialBlockStmts(BB); + break; + case GranularityChoice::ScalarIndepependence: + buildEqivClassBlockStmts(BB); + break; } - Loop *SurroundingLoop = LI.getLoopFor(I->getNodeAs()); - scop->addScopStmt(I->getNodeAs(), SurroundingLoop, - Instructions, Count); } } @@ -713,6 +855,43 @@ if (isErrorBlock(BB, scop->getRegion(), LI, DT) && !IsExitBlock) return; + if (Stmt && Stmt->isBlockStmt() && + (StmtGranularity == GranularityChoice::ScalarIndepependence)) { + // FIXME: Ugly workaround; Invariant loads should not be added to any + // statement in the first place. + if (Stmt == scop->getStmtListFor(&BB)[0]) { + for (auto &InvLoad : scop->getRequiredInvariantLoads()) { + if (InvLoad->getParent() != &BB) + continue; + + // scop->getOrCreateScopArrayInfo(InvLoad->getPointerOperand(), + // InvLoad->getType(), { nullptr }, MemoryKind::Array); + auto Stmt = scop->getStmtListFor(&BB)[0]; + buildMemoryAccess(MemAccInst(InvLoad), Stmt); + } + } + + for (auto *Inst : Stmt->getInstructions()) { + PHINode *PHI = dyn_cast(Inst); + if (PHI) + buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, IsExitBlock); + else + buildScalarDependences(Stmt, Inst); + + if (auto MemInst = MemAccInst::dyn_cast(Inst)) { + assert(Stmt && + "Cannot build access function in non-existing statement"); + buildMemoryAccess(MemInst, Stmt); + } + } + + for (Instruction &Inst : BB) { + buildEscapingDependences(&Inst); + } + + return; + } + int Count = 0; bool Split = false; for (Instruction &Inst : BB) { @@ -1184,6 +1363,15 @@ scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE)); buildStmts(R); + + // auto &Inv = scop->getRequiredInvariantLoads(); + // for (auto &InvLoad : Inv) { + // scop->getOrCreateScopArrayInfo(InvLoad->getPointerOperand(), + // InvLoad->getType(), {nullptr}, MemoryKind::Array); + + // auto Stmt = scop->getStmtListFor(InvLoad->getParent())[0]; + // buildMemoryAccess(MemAccInst( InvLoad), Stmt); + // } buildAccessFunctions(); // In case the region does not have an exiting block we will later (during Index: test/ScopInfo/granularity_scalar-indep.ll =================================================================== --- /dev/null +++ test/ScopInfo/granularity_scalar-indep.ll @@ -0,0 +1,66 @@ +; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines +; +; Split a block into two independant statements that share no scalar. +; +; for (int j = 0; j < n; j += 1) { +; bodyA: +; double val = B[j]; +; +; bodyB: +; A[j] = val; +; } +; +define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + %valA = load double, double* %A + %valB = fadd double 21.0, 21.0 + store double %valA, double* %A + store double %valB, double* %B + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statements { +; CHECK-NEXT: Stmt_body1 +; CHECK-NEXT: Domain := +; CHECK-NEXT: [n] -> { Stmt_body1[i0] : 0 <= i0 < n }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [n] -> { Stmt_body1[i0] -> [i0, 0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body1[i0] -> MemRef_A[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body1[i0] -> MemRef_A[0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %valA = load double, double* %A +; CHECK-NEXT: store double %valA, double* %A +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_body2 +; CHECK-NEXT: Domain := +; CHECK-NEXT: [n] -> { Stmt_body2[i0] : 0 <= i0 < n }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [n] -> { Stmt_body2[i0] -> [i0, 1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body2[i0] -> MemRef_B[0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %valB = fadd double 2.100000e+01, 2.100000e+01 +; CHECK-NEXT: store double %valB, double* %B +; CHECK-NEXT: } +; CHECK-NEXT: }