Index: polly/trunk/include/polly/ScopBuilder.h =================================================================== --- polly/trunk/include/polly/ScopBuilder.h +++ polly/trunk/include/polly/ScopBuilder.h @@ -235,6 +235,12 @@ /// separator is found. void buildSequentialBlockStmts(BasicBlock *BB); + /// Create one or more ScopStmts for @p BB using equivalence classes. + /// + /// Instructions of a basic block that belong to the same equivalence class + /// are added to the same statement. + void buildEqivClassBlockStmts(BasicBlock *BB); + /// Create ScopStmt for all BBs and non-affine subregions of @p SR. /// /// @param SR A subregion of @p R. Index: polly/trunk/lib/Analysis/ScopBuilder.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopBuilder.cpp +++ polly/trunk/lib/Analysis/ScopBuilder.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -102,14 +103,16 @@ cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory)); -enum class GranularityChoice { BasicBlocks }; +enum class GranularityChoice { BasicBlocks, ScalarIndepependence }; static cl::opt StmtGranularity( "polly-stmt-granularity", cl::desc( "Algorithm to use for splitting basic blocks into multiple statements"), cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb", - "One statement per basic block")), + "One statement per basic block"), + clEnumValN(GranularityChoice::ScalarIndepependence, + "scalar-indep", "Scalar independence heuristic")), cl::init(GranularityChoice::BasicBlocks), cl::cat(PollyCategory)); void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI, @@ -701,6 +704,178 @@ scop->addScopStmt(BB, SurroundingLoop, Instructions, Count); } +/// Is @p Inst an ordered instruction? +/// +/// An unordered instruction is an instruction, such that a sequence of +/// unordered instructions can be permuted without changing semantics. Any +/// instruction for which this is not always the case is ordered. +static bool isOrderedInstruction(Instruction *Inst) { + return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory(); +} + +/// Join instructions to the same statement if one uses the scalar result of the +/// other. +static void joinOperandTree(EquivalenceClasses &UnionFind, + ArrayRef ModeledInsts) { + for (Instruction *Inst : ModeledInsts) { + if (isa(Inst)) + continue; + + for (Use &Op : Inst->operands()) { + Instruction *OpInst = dyn_cast(Op.get()); + if (!OpInst) + continue; + + // Check if OpInst is in the BB and is a modeled instruction. + auto OpVal = UnionFind.findValue(OpInst); + if (OpVal == UnionFind.end()) + continue; + + UnionFind.unionSets(Inst, OpInst); + } + } +} + +/// Join instructions that are used as incoming value in successor PHIs into the +/// epilogue. +static void +joinIncomingPHIValuesIntoEpilogue(EquivalenceClasses &UnionFind, + ArrayRef ModeledInsts, + BasicBlock *BB) { + for (BasicBlock *Succ : successors(BB)) { + for (Instruction &SuccInst : *Succ) { + PHINode *SuccPHI = dyn_cast(&SuccInst); + if (!SuccPHI) + break; + + Value *IncomingVal = SuccPHI->getIncomingValueForBlock(BB); + Instruction *IncomingInst = dyn_cast(IncomingVal); + if (!IncomingInst) + continue; + if (IncomingInst->getParent() != BB) + continue; + if (UnionFind.findValue(IncomingInst) == UnionFind.end()) + continue; + + UnionFind.unionSets(nullptr, IncomingInst); + } + } +} + +/// Ensure that the order of ordered instructions does not change. +/// +/// If we encounter an ordered instruction enclosed in instructions belonging to +/// a different statement (which might as well contain ordered instructions, but +/// this is not tested here), join them. +static void +joinOrderedInstructions(EquivalenceClasses &UnionFind, + ArrayRef ModeledInsts) { + SetVector SeenLeaders; + for (Instruction *Inst : ModeledInsts) { + if (!isOrderedInstruction(Inst)) + continue; + + Instruction *Leader = UnionFind.getLeaderValue(Inst); + bool Inserted = SeenLeaders.insert(Leader); + if (Inserted) + continue; + + // Merge statements to close holes. Say, we have already seen statements A + // and B, in this order. Then we see an instruction of A again and we would + // see the pattern "A B A". This function joins all statements until the + // only seen occurrence of A. + for (Instruction *Prev : reverse(SeenLeaders)) { + // Items added to 'SeenLeaders' are leaders, but may have lost their + // leadership status when merged into another statement. + Instruction *PrevLeader = UnionFind.getLeaderValue(SeenLeaders.back()); + if (PrevLeader == Leader) + break; + UnionFind.unionSets(Prev, Leader); + } + } +} + +/// Also ensure that the epilogue is the last statement relative to all ordered +/// instructions. +/// +/// This is basically joinOrderedInstructions() but using the epilogue as +/// 'ordered instruction'. +static void joinAllAfterEpilogue(EquivalenceClasses &UnionFind, + ArrayRef ModeledInsts) { + bool EpilogueSeen = false; + for (Instruction *Inst : ModeledInsts) { + auto PHIWritesLeader = UnionFind.findLeader(nullptr); + auto InstLeader = UnionFind.findLeader(Inst); + + if (PHIWritesLeader == InstLeader) + EpilogueSeen = true; + + if (!isOrderedInstruction(Inst)) + continue; + + if (EpilogueSeen) + UnionFind.unionSets(PHIWritesLeader, InstLeader); + } +} + +void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) { + Loop *L = LI.getLoopFor(BB); + + // Extracting out modeled instructions saves us from checking + // shouldModelInst() repeatedly. + SmallVector ModeledInsts; + EquivalenceClasses UnionFind; + for (Instruction &Inst : *BB) { + if (!shouldModelInst(&Inst, L)) + continue; + ModeledInsts.push_back(&Inst); + UnionFind.insert(&Inst); + } + + // 'nullptr' represents the last statement for a basic block. It contains no + // instructions, but holds the PHI write accesses for successor basic blocks. + // If a PHI has an incoming value defined in this BB, it can also be merged + // with other statements. + // TODO: We wouldn't need this if we would add PHIWrites into the statement + // that defines the incoming value (if in the BB) instead of always the last, + // so we could unconditionally always add a last statement. + UnionFind.insert(nullptr); + + joinOperandTree(UnionFind, ModeledInsts); + joinIncomingPHIValuesIntoEpilogue(UnionFind, ModeledInsts, BB); + joinOrderedInstructions(UnionFind, ModeledInsts); + joinAllAfterEpilogue(UnionFind, ModeledInsts); + + // The list of instructions for statement (statement represented by the leader + // instruction). The order of statements instructions is reversed such that + // the epilogue is first. This makes it easier to ensure that the epilogue is + // the last statement. + MapVector> LeaderToInstList; + + // Ensure that the epilogue is last. + LeaderToInstList[nullptr]; + + // Collect the instructions of all leaders. UnionFind's member iterator + // unfortunately are not in any specific order. + for (Instruction &Inst : reverse(*BB)) { + auto LeaderIt = UnionFind.findLeader(&Inst); + if (LeaderIt == UnionFind.member_end()) + continue; + + std::vector &InstList = LeaderToInstList[*LeaderIt]; + InstList.push_back(&Inst); + } + + // Finally build the statements. + int Count = 0; + for (auto &Instructions : reverse(LeaderToInstList)) { + std::vector &InstList = Instructions.second; + std::reverse(InstList.begin(), InstList.end()); + scop->addScopStmt(BB, L, std::move(InstList), Count); + Count += 1; + } +} + void ScopBuilder::buildStmts(Region &SR) { if (scop->isNonAffineSubRegion(&SR)) { std::vector Instructions; @@ -722,6 +897,9 @@ case GranularityChoice::BasicBlocks: buildSequentialBlockStmts(BB); break; + case GranularityChoice::ScalarIndepependence: + buildEqivClassBlockStmts(BB); + break; } } } Index: polly/trunk/test/ScopInfo/granularity_scalar-indep.ll =================================================================== --- polly/trunk/test/ScopInfo/granularity_scalar-indep.ll +++ polly/trunk/test/ScopInfo/granularity_scalar-indep.ll @@ -0,0 +1,68 @@ +; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines +; +; Split a block into two independent statements that share no scalar. +; This case has the instructions of the two statements interleaved, such that +; splitting the BasicBlock in the middle would cause a scalar dependency. +; +; for (int j = 0; j < n; j += 1) { +; body: +; double valA = A[0]; +; double valB = 21.0 + 21.0; +; A[0] = valA; +; B[0] = valB; +; } +; +define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + %valA = load double, double* %A + %valB = fadd double 21.0, 21.0 + store double %valA, double* %A + store double %valB, double* %B + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statements { +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: Domain := +; CHECK-NEXT: [n] -> { Stmt_body[i0] : 0 <= i0 < n }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> [i0, 0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %valA = load double, double* %A +; CHECK-NEXT: store double %valA, double* %A +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_body1 +; CHECK-NEXT: Domain := +; CHECK-NEXT: [n] -> { Stmt_body1[i0] : 0 <= i0 < n }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [n] -> { Stmt_body1[i0] -> [i0, 1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body1[i0] -> MemRef_B[0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %valB = fadd double 2.100000e+01, 2.100000e+01 +; CHECK-NEXT: store double %valB, double* %B +; CHECK-NEXT: } +; CHECK-NEXT: } Index: polly/trunk/test/ScopInfo/granularity_scalar-indep_epilogue.ll =================================================================== --- polly/trunk/test/ScopInfo/granularity_scalar-indep_epilogue.ll +++ polly/trunk/test/ScopInfo/granularity_scalar-indep_epilogue.ll @@ -0,0 +1,68 @@ +; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines +; +; Split a block into two independent statements that share no scalar. +; This case has an independent statement just for PHI writes. +; +; for (int j = 0; j < n; j += 1) { +; bodyA: +; double valA = A[0]; +; A[0] = valA; +; +; bodyB: +; phi = 42.0; +; } +; +define void @func(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + bodyA: + %valA = load double, double* %A + store double %valA, double* %A + br label %bodyB + + bodyB: + %phi = phi double [42.0, %bodyA] + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statements { +; CHECK-NEXT: Stmt_bodyA +; CHECK-NEXT: Domain := +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] : 0 <= i0 < n }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> [i0, 0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %valA = load double, double* %A +; CHECK-NEXT: store double %valA, double* %A +; CHECK-NEXT: } +; CHECK-NEXT: Stmt_bodyA1 +; CHECK-NEXT: Domain := +; CHECK-NEXT: [n] -> { Stmt_bodyA1[i0] : 0 <= i0 < n }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [n] -> { Stmt_bodyA1[i0] -> [i0, 1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_bodyA1[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: } +; CHECK-NEXT: } Index: polly/trunk/test/ScopInfo/granularity_scalar-indep_epilogue_last.ll =================================================================== --- polly/trunk/test/ScopInfo/granularity_scalar-indep_epilogue_last.ll +++ polly/trunk/test/ScopInfo/granularity_scalar-indep_epilogue_last.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines +; +; Check that the statement where the PHI writes are in always comes last. +; Otherwise we'd need to introduce a scalar dependency from the statement +; defining a value to the last statement of a basic block. +; +; for (int j = 0; j < n; j += 1) { +; bodyA: +; double valA = A[0]; +; A[0] = valA; +; double valB = B[0]; +; B[0] = valB; +; +; bodyB: +; phi = valA; +; } +; +define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + bodyA: + %valA = load double, double* %A + store double %valA, double* %A + %valB = load double, double* %B + store double %valB, double* %B + br label %bodyB + + bodyB: + %phi = phi double [%valA, %bodyA] + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statements { +; CHECK-NEXT: Stmt_bodyA +; CHECK-NEXT: Domain := +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] : 0 <= i0 < n }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> [i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_B[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_B[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %valA = load double, double* %A +; CHECK-NEXT: store double %valA, double* %A +; CHECK-NEXT: %valB = load double, double* %B +; CHECK-NEXT: store double %valB, double* %B +; CHECK-NEXT: } +; CHECK-NEXT: } Index: polly/trunk/test/ScopInfo/granularity_scalar-indep_noepilogue.ll =================================================================== --- polly/trunk/test/ScopInfo/granularity_scalar-indep_noepilogue.ll +++ polly/trunk/test/ScopInfo/granularity_scalar-indep_noepilogue.ll @@ -0,0 +1,61 @@ +; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines +; +; This case has no explicit epilogue for PHI writes because it would +; have a scalar dependency to the previous statement. +; +; for (int j = 0; j < n; j += 1) { +; bodyA: +; double valA = A[0]; +; A[0] = valA; +; +; bodyB: +; phi = valA; +; } +; +define void @func(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + bodyA: + %valA = load double, double* %A + store double %valA, double* %A + br label %bodyB + + bodyB: + %phi = phi double [%valA, %bodyA] + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statements { +; CHECK-NEXT: Stmt_bodyA +; CHECK-NEXT: Domain := +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] : 0 <= i0 < n }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> [i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %valA = load double, double* %A +; CHECK-NEXT: store double %valA, double* %A +; CHECK-NEXT: } +; CHECK-NEXT: } Index: polly/trunk/test/ScopInfo/granularity_scalar-indep_ordered.ll =================================================================== --- polly/trunk/test/ScopInfo/granularity_scalar-indep_ordered.ll +++ polly/trunk/test/ScopInfo/granularity_scalar-indep_ordered.ll @@ -0,0 +1,62 @@ +; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines +; +; This case cannot be split into two statements because the order of +; loads and store would be violated. +; +; for (int j = 0; j < n; j += 1) { +; body: +; double valA = A[0]; +; double valB = B[0]; +; A[0] = valA; +; A[0] = valB; +; } +; +define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + %valA = load double, double* %A + %valB = load double, double* %B + store double %valA, double* %A + store double %valB, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statements { +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: Domain := +; CHECK-NEXT: [n] -> { Stmt_body[i0] : 0 <= i0 < n }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> [i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_B[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %valA = load double, double* %A +; CHECK-NEXT: %valB = load double, double* %B +; CHECK-NEXT: store double %valA, double* %A +; CHECK-NEXT: store double %valB, double* %A +; CHECK-NEXT: } +; CHECK-NEXT: }