diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h --- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h @@ -104,6 +104,13 @@ /// smaller than the cache line size \p CLS. bool isConsecutive(const Loop &L, unsigned CLS) const; + /// Retrieve the \p Index of the subscript corresponding to the given loop \p + /// L. Return true if the subscript index is succesfully located and false + /// otherwise. For example given the indexed reference 'A[i][2j+1][3k+2][l]', + /// the call 'getSubscriptIndex(loop-k,Index)' would fill \p Index with the + /// value 2 and return true. + bool getSubscriptIndex(const Loop &L, unsigned &Index) const; + /// Return the coefficient used in the rightmost dimension. const SCEV *getLastCoefficient() const; diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp --- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -97,16 +97,26 @@ return StepRec == &ElemSize; } -/// Compute the trip count for the given loop \p L. Return the SCEV expression -/// for the trip count or nullptr if it cannot be computed. -static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) { +/// Compute the trip count for the given loop \p L or assume a default value if +/// it is not a compile time constant. Return the SCEV expression for the trip +/// count. +static const SCEV *computeTripCount(const Loop &L, const SCEV &ElemSize, + ScalarEvolution &SE) { const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L); - if (isa(BackedgeTakenCount) || - !isa(BackedgeTakenCount)) - return nullptr; + const SCEV *TripCount = + (!isa(BackedgeTakenCount) && + isa(BackedgeTakenCount)) + ? SE.getAddExpr(BackedgeTakenCount, + SE.getOne(BackedgeTakenCount->getType())) + : nullptr; + + if (!TripCount) { + LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName() + << " could not be computed, using DefaultTripCount\n"); + TripCount = SE.getConstant(ElemSize.getType(), DefaultTripCount); + } - return SE.getAddExpr(BackedgeTakenCount, - SE.getOne(BackedgeTakenCount->getType())); + return TripCount; } //===----------------------------------------------------------------------===// @@ -270,22 +280,18 @@ return 1; } - const SCEV *TripCount = computeTripCount(L, SE); - if (!TripCount) { - LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName() - << " could not be computed, using DefaultTripCount\n"); - const SCEV *ElemSize = Sizes.back(); - TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount); - } + const SCEV *TripCount = computeTripCount(L, *Sizes.back(), SE); + assert(TripCount && "Expecting valid TripCount"); LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n"); - // If the indexed reference is 'consecutive' the cost is - // (TripCount*Stride)/CLS, otherwise the cost is TripCount. - const SCEV *RefCost = TripCount; - + const SCEV *RefCost = nullptr; if (isConsecutive(L, CLS)) { + // If the indexed reference is 'consecutive' the cost is + // (TripCount*Stride)/CLS. const SCEV *Coeff = getLastCoefficient(); const SCEV *ElemSize = Sizes.back(); + assert(Coeff->getType() == ElemSize->getType() && + "Expecting the same type"); const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType()); @@ -299,10 +305,33 @@ LLVM_DEBUG(dbgs().indent(4) << "Access is consecutive: RefCost=(TripCount*Stride)/CLS=" << *RefCost << "\n"); - } else + } else { + // If the indexed reference is not 'consecutive' the cost is equal to the + // number of iterations associated with the subscript corresponding to the + // loop. For example, given the indexed reference 'A[i][j][k]', and assuming + // the j-loop is in the innermost position, the cost would be equal to the + // iterations of the j-loop. + RefCost = TripCount; + + unsigned Index = 0; + bool Found = getSubscriptIndex(L, Index); + assert(Found && "Cound not locate a valid Index"); + + for (unsigned I = Index + 1; I < getNumSubscripts() - 1; ++I) { + const SCEVAddRecExpr *AR = dyn_cast(getSubscript(I)); + assert(AR && AR->getLoop() && "Expecting valid loop"); + const SCEV *TripCount = + computeTripCount(*AR->getLoop(), *Sizes.back(), SE); + Type *WiderType = + SE.getWiderType(RefCost->getType(), TripCount->getType()); + RefCost = SE.getMulExpr(SE.getNoopOrAnyExtend(RefCost, WiderType), + SE.getNoopOrAnyExtend(TripCount, WiderType)); + } + LLVM_DEBUG(dbgs().indent(4) - << "Access is not consecutive: RefCost=TripCount=" << *RefCost - << "\n"); + << "Access is not consecutive: RefCost=" << *RefCost << "\n"); + } + assert(RefCost && "Expecting a valid RefCost"); // Attempt to fold RefCost into a constant. if (auto ConstantCost = dyn_cast(RefCost)) @@ -420,6 +449,17 @@ return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize); } +bool IndexedReference::getSubscriptIndex(const Loop &L, unsigned &Index) const { + for (auto Idx : seq(0, getNumSubscripts())) { + const SCEVAddRecExpr *AR = dyn_cast(getSubscript(Idx)); + if (AR && AR->getLoop() == &L) { + Index = Idx; + return true; + } + } + return false; +} + const SCEV *IndexedReference::getLastCoefficient() const { const SCEV *LastSubscript = getLastSubscript(); assert(isa(LastSubscript) && diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll --- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll +++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/matvecmul.ll @@ -14,9 +14,9 @@ ; y[k+1][j][i][l] = y[k+1][j][i][l] + b[k][j][i][m][l]*x[k][j][i][m] ; } -; CHECK-DAG: Loop 'k_loop' has cost = 30000000000 -; CHECK-DAG: Loop 'j_loop' has cost = 30000000000 -; CHECK-DAG: Loop 'i_loop' has cost = 30000000000 +; CHECK-DAG: Loop 'k_loop' has cost = 10200000000000000 +; CHECK-DAG: Loop 'j_loop' has cost = 102000000000000 +; CHECK-DAG: Loop 'i_loop' has cost = 1020000000000 ; CHECK-DAG: Loop 'm_loop' has cost = 10700000000 ; CHECK-DAG: Loop 'l_loop' has cost = 1300000000