diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -65,27 +65,27 @@ using LoopVector = SmallVector; +// Synchronize with Dependence::DVEntry::LT,EQ,GT from DependenceAnalysis +enum DepFlags { + LT = 1 << 0, + EQ = 1 << 1, + GT = 1 << 2, + Scalar = 1 << 3, + Ignore = 1 << 4, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Ignore) +}; + // TODO: Check if we can use a sparse matrix here. -using CharMatrix = std::vector>; +using CharMatrix = std::vector; } // end anonymous namespace // Maximum number of dependencies that can be handled in the dependency matrix. -static const unsigned MaxMemInstrCount = 100; +static const unsigned MaxMemInstrCount = 10; // Maximum loop depth supported. static const unsigned MaxLoopNestDepth = 10; -#ifdef DUMP_DEP_MATRICIES -static void printDepMatrix(CharMatrix &DepMatrix) { - for (auto &Row : DepMatrix) { - for (auto D : Row) - LLVM_DEBUG(dbgs() << D << " "); - LLVM_DEBUG(dbgs() << "\n"); - } -} -#endif - static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE) { @@ -114,11 +114,19 @@ LLVM_DEBUG(dbgs() << "Found " << MemInstr.size() << " Loads and Stores to analyze\n"); - ValueVector::iterator I, IE, J, JE; + if (MemInstr.size() > MaxMemInstrCount) { + LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount + << " dependencies inside loop\n"); + return false; + } + + DepMatrix.resize(Level); + for (unsigned II = 0; II < Level; ++II) + DepMatrix[II] = static_cast(0); + ValueVector::iterator I, IE, J, JE; for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { for (J = I, JE = MemInstr.end(); J != JE; ++J) { - std::vector Dep; Instruction *Src = cast(*I); Instruction *Dst = cast(*J); // Ignore Input dependencies. @@ -137,36 +145,19 @@ << " dependency between Src and Dst\n" << " Src:" << *Src << "\n Dst:" << *Dst << '\n'); unsigned Levels = D->getLevels(); - char Direction; - for (unsigned II = 1; II <= Levels; ++II) { - if (D->isScalar(II)) { - Direction = 'S'; - Dep.push_back(Direction); + unsigned II = 0; + for (; II < Levels; ++II) { + DepFlags &Lvl = DepMatrix[II]; + if (D->isScalar(II + 1)) { + Lvl |= Scalar; } else { - unsigned Dir = D->getDirection(II); - if (Dir == Dependence::DVEntry::LT || - Dir == Dependence::DVEntry::LE) - Direction = '<'; - else if (Dir == Dependence::DVEntry::GT || - Dir == Dependence::DVEntry::GE) - Direction = '>'; - else if (Dir == Dependence::DVEntry::EQ) - Direction = '='; - else - Direction = '*'; - Dep.push_back(Direction); + // DepFlags needs lower 3 bits of DepFlags for be the same as used + // by DA. + Lvl |= static_cast(D->getDirection(II + 1)); } } - while (Dep.size() != Level) { - Dep.push_back('I'); - } - - DepMatrix.push_back(Dep); - if (DepMatrix.size() > MaxMemInstrCount) { - LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount - << " dependencies inside loop\n"); - return false; - } + for (; II < Level; ++II) + DepMatrix[II] |= Ignore; } } } @@ -178,37 +169,27 @@ // matrix by exchanging the two columns. static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx) { - for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I) - std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]); + std::swap(DepMatrix[ToIndx], DepMatrix[FromIndx]); } // Checks if no dependence exist in the dependency matrix in Row before Column. -static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, - unsigned Column) { +static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Column) { for (unsigned i = 0; i < Column; ++i) { - if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' && - DepMatrix[Row][i] != 'I') + DepFlags Lvl = DepMatrix[i]; + if (Lvl & LT || Lvl & GT) return false; } return true; } -static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, - unsigned OuterLoopId, char InnerDep, - char OuterDep) { +static bool validDepInterchange(CharMatrix &DepMatrix, unsigned OuterLoopId, + DepFlags InnerDep, DepFlags OuterDep) { if (InnerDep == OuterDep) return true; // It is legal to interchange if and only if after interchange no row has a // '>' direction as the leftmost non-'='. - - if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I') - return true; - - if (InnerDep == '<') - return true; - - if (InnerDep == '>') { + if (InnerDep & GT) { // If OuterLoopId represents outermost loop then interchanging will make the // 1st dependency as '>' if (OuterLoopId == 0) @@ -217,13 +198,15 @@ // If all dependencies before OuterloopId are '=','S'or 'I'. Then // interchanging will result in this row having an outermost non '=' // dependency of '>' - if (!containsNoDependence(DepMatrix, Row, OuterLoopId)) + if (!containsNoDependence(DepMatrix, OuterLoopId)) return true; } - return false; + return true; } +static bool isAnydirectional(DepFlags F) { return (F & LT) && (F & GT); } + // Checks if it is legal to interchange 2 loops. // [Theorem] A permutation of the loops in a perfect nest is legal if and only // if the direction matrix, after the same permutation is applied to its @@ -231,16 +214,14 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId) { - unsigned NumRows = DepMatrix.size(); // For each row check if it is valid to interchange. - for (unsigned Row = 0; Row < NumRows; ++Row) { - char InnerDep = DepMatrix[Row][InnerLoopId]; - char OuterDep = DepMatrix[Row][OuterLoopId]; - if (InnerDep == '*' || OuterDep == '*') - return false; - if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep)) - return false; - } + DepFlags InnerDep = DepMatrix[InnerLoopId]; + DepFlags OuterDep = DepMatrix[OuterLoopId]; + if (isAnydirectional(InnerDep) || isAnydirectional(OuterDep)) + return false; + if (!validDepInterchange(DepMatrix, OuterLoopId, InnerDep, OuterDep)) + return false; + return true; } @@ -467,10 +448,6 @@ LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n"); return false; } -#ifdef DUMP_DEP_MATRICIES - LLVM_DEBUG(dbgs() << "Dependence before interchange\n"); - printDepMatrix(DependencyMatrix); -#endif // Get the Outermost loop exit. BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock(); @@ -510,10 +487,6 @@ std::swap(LoopList[i - 1], LoopList[i]); // Update the DependencyMatrix interChangeDependencies(DependencyMatrix, i, i - 1); -#ifdef DUMP_DEP_MATRICIES - LLVM_DEBUG(dbgs() << "Dependence after interchange\n"); - printDepMatrix(DependencyMatrix); -#endif ChangedPerIter |= Interchanged; Changed |= Interchanged; } @@ -526,8 +499,7 @@ } bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId, - unsigned OuterLoopId, - std::vector> &DependencyMatrix, + unsigned OuterLoopId, CharMatrix &DependencyMatrix, const DenseMap &CostMap) { LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << "\n"); @@ -1116,17 +1088,17 @@ // TODO: Improve this heuristic to catch more cases. // If the inner loop is loop independent or doesn't carry any dependency it is // profitable to move this to outer position. - for (auto &Row : DepMatrix) { - if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I') - return false; - // TODO: We need to improve this heuristic. - if (Row[OuterLoopId] != '=') - return false; - } + if (DepMatrix[InnerLoopId] & ~(Scalar | Ignore)) + return false; + + // TODO: We need to improve this heuristic. + if (DepMatrix[OuterLoopId] & ~EQ) + return false; + // If outer loop has dependence and inner loop is loop independent then it is // profitable to interchange to enable parallelism. // If there are no dependences, interchanging will not improve anything. - return !DepMatrix.empty(); + return DepMatrix[InnerLoopId] != 0; } bool LoopInterchangeProfitability::isProfitable(