Index: llvm/include/llvm/Analysis/LoopCacheAnalysis.h =================================================================== --- llvm/include/llvm/Analysis/LoopCacheAnalysis.h +++ llvm/include/llvm/Analysis/LoopCacheAnalysis.h @@ -108,8 +108,9 @@ /// Return true if the indexed reference is 'consecutive' in loop \p L. /// An indexed reference is 'consecutive' if the only coefficient that uses /// the loop induction variable is the rightmost one, and the access stride is - /// smaller than the cache line size \p CLS. - bool isConsecutive(const Loop &L, unsigned CLS) const; + /// smaller than the cache line size \p CLS. Provide a valid \p Stride value + /// if the indexed reference is 'consecutive'. + bool isConsecutive(const Loop &L, const SCEV *&Stride, unsigned CLS) const; /// Retrieve the index of the subscript corresponding to the given loop \p /// L. Return a zero-based positive index if the subscript index is Index: llvm/lib/Analysis/LoopCacheAnalysis.cpp =================================================================== --- llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -289,18 +289,14 @@ LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n"); const SCEV *RefCost = nullptr; - if (isConsecutive(L, CLS)) { + const SCEV *Stride = nullptr; + if (isConsecutive(L, Stride, CLS)) { // If the indexed reference is 'consecutive' the cost is // (TripCount*Stride)/CLS. - const SCEV *Coeff = getLastCoefficient(); - const SCEV *ElemSize = Sizes.back(); - assert(Coeff->getType() == ElemSize->getType() && - "Expecting the same type"); - const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); + assert(Stride != nullptr && + "Stride should not be null for consecutive access!"); Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType()); const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS); - if (SE.isKnownNegative(Stride)) - Stride = SE.getNegativeSCEV(Stride); Stride = SE.getNoopOrAnyExtend(Stride, WiderType); TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType); const SCEV *Numerator = SE.getMulExpr(Stride, TripCount); @@ -464,7 +460,8 @@ return allCoeffForLoopAreZero; } -bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const { +bool IndexedReference::isConsecutive(const Loop &L, const SCEV *&Stride, + unsigned CLS) const { // The indexed reference is 'consecutive' if the only coefficient that uses // the loop induction variable is the last one... const SCEV *LastSubscript = Subscripts.back(); @@ -478,7 +475,11 @@ // ...and the access stride is less than the cache line size. const SCEV *Coeff = getLastCoefficient(); const SCEV *ElemSize = Sizes.back(); - const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); + Type *WiderType = SE.getWiderType(Coeff->getType(), ElemSize->getType()); + // TODO: Extend more explicitly based on whether Coeff and ElemSize are + // positive or negative. + Stride = SE.getMulExpr(SE.getNoopOrSignExtend(Coeff, WiderType), + SE.getNoopOrSignExtend(ElemSize, WiderType)); const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride; Index: llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll =================================================================== --- llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll +++ llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -opaque-pointers -passes='print' -disable-output 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux-gnu" @@ -33,7 +33,54 @@ ret void } +; Check IndexedReference::computeRefCost can handle type differences between +; Coeff and ElemSize. + +; CHECK: Loop 'for.cond' has cost = 100000000 +; CHECK: Loop 'for.cond1' has cost = 1000000 +; CHECK: Loop 'for.cond5' has cost = 30000 + +@data = external dso_local global [2 x [4 x [18 x i32]]], align 1 + +define dso_local void @handle_to_ptr_2(i1 %b0, i1 %b1, i1 %b2) { +entry: + br label %for.cond + +for.cond: + %i.0 = phi i16 [ 0, %entry ], [ %inc18, %for.inc17 ] + %idxprom = zext i16 %i.0 to i32 + br i1 %b2, label %for.end19, label %for.cond1 + +for.cond1: + %j.0 = phi i16 [ %inc15, %for.inc14 ], [ 0, %for.cond ] + br i1 %b1, label %for.inc17, label %for.cond5.preheader +for.cond5.preheader: + %idxprom10 = zext i16 %j.0 to i32 + br label %for.cond5 + +for.cond5: + %k.0 = phi i16 [ %inc, %for.inc ], [ 0, %for.cond5.preheader ] + br i1 %b0, label %for.inc14, label %for.inc + +for.inc: + %idxprom12 = zext i16 %k.0 to i32 + %arrayidx13 = getelementptr inbounds [2 x [4 x [18 x i32]]], ptr @data, i32 0, i32 %idxprom, i32 %idxprom10, i32 %idxprom12 + store i32 7, ptr %arrayidx13, align 1 + %inc = add nuw nsw i16 %k.0, 1 + br label %for.cond5 + +for.inc14: + %inc15 = add nuw nsw i16 %j.0, 1 + br label %for.cond1 + +for.inc17: + %inc18 = add nuw nsw i16 %i.0, 1 + br label %for.cond + +for.end19: + ret void +} ; Check IndexedReference::computeRefCost can handle negative stride Index: llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll =================================================================== --- llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll +++ llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll @@ -35,7 +35,56 @@ ret void } +; Check IndexedReference::computeRefCost can handle type differences between +; Coeff and ElemSize. + +; SMALLER-CACHELINE: Loop 'for.cond' has cost = 100000000 +; SMALLER-CACHELINE: Loop 'for.cond1' has cost = 1000000 +; SMALLER-CACHELINE: Loop 'for.cond5' has cost = 120000 +; LARGER-CACHELINE: Loop 'for.cond' has cost = 100000000 +; LARGER-CACHELINE: Loop 'for.cond1' has cost = 1000000 +; LARGER-CACHELINE: Loop 'for.cond5' has cost = 10000 +@data = external dso_local global [2 x [4 x [18 x i32]]], align 1 + +define dso_local void @handle_to_ptr_2(i1 %b0, i1 %b1, i1 %b2) { +entry: + br label %for.cond + +for.cond: + %i.0 = phi i16 [ 0, %entry ], [ %inc18, %for.inc17 ] + %idxprom = zext i16 %i.0 to i32 + br i1 %b2, label %for.end19, label %for.cond1 + +for.cond1: + %j.0 = phi i16 [ %inc15, %for.inc14 ], [ 0, %for.cond ] + br i1 %b1, label %for.inc17, label %for.cond5.preheader + +for.cond5.preheader: + %idxprom10 = zext i16 %j.0 to i32 + br label %for.cond5 +for.cond5: + %k.0 = phi i16 [ %inc, %for.inc ], [ 0, %for.cond5.preheader ] + br i1 %b0, label %for.inc14, label %for.inc + +for.inc: + %idxprom12 = zext i16 %k.0 to i32 + %arrayidx13 = getelementptr inbounds [2 x [4 x [18 x i32]]], ptr @data, i32 0, i32 %idxprom, i32 %idxprom10, i32 %idxprom12 + store i32 7, ptr %arrayidx13, align 1 + %inc = add nuw nsw i16 %k.0, 1 + br label %for.cond5 + +for.inc14: + %inc15 = add nuw nsw i16 %j.0, 1 + br label %for.cond1 + +for.inc17: + %inc18 = add nuw nsw i16 %i.0, 1 + br label %for.cond + +for.end19: + ret void +} ; Check IndexedReference::computeRefCost can handle negative stride