Index: llvm/lib/Analysis/LoopCacheAnalysis.cpp =================================================================== --- llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -90,7 +90,11 @@ if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L)) return false; - return AR->getStepRecurrence(SE) == &ElemSize; + const SCEV *StepRec = AR->getStepRecurrence(SE); + if (StepRec && SE.isKnownNegative(StepRec)) + StepRec = SE.getNegativeSCEV(StepRec); + + return StepRec == &ElemSize; } /// Compute the trip count for the given loop \p L. Return the SCEV expression @@ -285,10 +289,13 @@ const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType()); - Stride = SE.getNoopOrSignExtend(Stride, WiderType); + if (SE.isKnownNegative(Stride)) + Stride = SE.getNegativeSCEV(Stride); + Stride = SE.getNoopOrAnyExtend(Stride, WiderType); TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType); const SCEV *Numerator = SE.getMulExpr(Stride, TripCount); RefCost = SE.getUDivExpr(Numerator, CacheLineSize); + LLVM_DEBUG(dbgs().indent(4) << "Access is consecutive: RefCost=(TripCount*Stride)/CLS=" << *RefCost << "\n"); @@ -349,6 +356,29 @@ return false; } + // The array may be accessed in reverse, for example: + // + // typedef struct _Handleitem { + // struct _Handleitem *next; + // } Handleitem; + // + // void simpleLoopFunc(Handleitem *(blocks[1024])) { + // int i; + // for (i=1023; i > 0; i--) + // blocks[i]=0; + // } + // + // In this case, reconstruct the access function using the absolute value + // of the step recurrence. + const SCEVAddRecExpr *AccessFnAR = dyn_cast(AccessFn); + const SCEV *StepRec = AccessFnAR ? AccessFnAR->getStepRecurrence(SE) : nullptr; + + if (StepRec && SE.isKnownNegative(StepRec)) { + AccessFn = SE.getAddRecExpr(AccessFnAR->getStart(), + SE.getNegativeSCEV(StepRec), + AccessFnAR->getLoop(), + AccessFnAR->getNoWrapFlags()); + } const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize); Subscripts.push_back(Div); Sizes.push_back(ElemSize); @@ -396,6 +426,7 @@ const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize); const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS); + Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride; return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize); } Index: llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll =================================================================== --- llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll +++ llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll @@ -31,5 +31,122 @@ ; Exit blocks for.end: ; preds = %for.cond ret void +} + + + +; Check IndexedReference::computeRefCost can handle negative stride + +; CHECK: Loop 'for.neg.cond' has cost = 64 + +define void @handle_to_ptr_neg_stride(%struct._Handleitem** %blocks) { +; Preheader: +entry: + br label %for.neg.cond + +; Loop: +for.neg.cond: ; preds = %for.neg.body, %entry + %i.0 = phi i32 [ 1023, %entry ], [ %dec, %for.neg.body ] + %cmp = icmp sgt i32 %i.0, 0 + br i1 %cmp, label %for.neg.body, label %for.neg.end + +for.neg.body: ; preds = %for.neg.cond + %idxprom = zext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds %struct._Handleitem*, %struct._Handleitem** %blocks, i64 %idxprom + store %struct._Handleitem* null, %struct._Handleitem** %arrayidx, align 8 + %dec = add nsw i32 %i.0, -1 + br label %for.neg.cond + +; Exit blocks +for.neg.end: ; preds = %for.neg.cond + ret void +} + + + +; for (int i = 1024; i > 0; i--) +; B[i] = B[1024 - i]; + +; CHECK: Loop 'for.cond2' has cost = 64 +define void @Test2(%struct._Handleitem** %B) { +entry: + br label %for.cond2 + +for.cond2: ; preds = %for.body, %entry + %i.0 = phi i32 [ 1024, %entry ], [ %dec, %for.body ] + %cmp = icmp sgt i32 %i.0, 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %sub = sub nsw i32 1024, %i.0 + %idxprom = sext i32 %sub to i64 + %arrayidx = getelementptr inbounds %struct._Handleitem*, %struct._Handleitem** %B, i64 %idxprom + %0 = load %struct._Handleitem*, %struct._Handleitem** %arrayidx, align 8 + %idxprom1 = sext i32 %i.0 to i64 + %arrayidx2 = getelementptr inbounds %struct._Handleitem*, %struct._Handleitem** %B, i64 %idxprom1 + store %struct._Handleitem* %0, %struct._Handleitem** %arrayidx2, align 8 + %dec = add nsw i32 %i.0, -1 + br label %for.cond2 + +for.end: ; preds = %for.cond + ret void +} + + + +; for (i = 1024; i > 0; i--) +; C[i] = C[i]; + +; CHECK: Loop 'for.cond3' has cost = 64 +define void @Test3(%struct._Handleitem** %C) { +entry: + br label %for.cond3 + +for.cond3: ; preds = %for.body, %entry + %i.0 = phi i32 [ 1024, %entry ], [ %dec, %for.body ] + %cmp = icmp sgt i32 %i.0, 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %idxprom = sext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds %struct._Handleitem*, %struct._Handleitem** %C, i64 %idxprom + %0 = load %struct._Handleitem*, %struct._Handleitem** %arrayidx, align 8 + %idxprom1 = sext i32 %i.0 to i64 + %arrayidx2 = getelementptr inbounds %struct._Handleitem*, %struct._Handleitem** %C, i64 %idxprom1 + store %struct._Handleitem* %0, %struct._Handleitem** %arrayidx2, align 8 + %dec = add nsw i32 %i.0, -1 + br label %for.cond3 + +for.end: ; preds = %for.cond + ret void +} + + + +; for (i = 0; i < 1024; i++) +; D[i] = D[i]; + +; CHECK: Loop 'for.cond4' has cost = 64 +define void @Test4(%struct._Handleitem** %D) { +entry: + br label %for.cond4 +for.cond4: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, 1024 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %idxprom = sext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds %struct._Handleitem*, %struct._Handleitem** %D, i64 %idxprom + %0 = load %struct._Handleitem*, %struct._Handleitem** %arrayidx, align 8 + %idxprom1 = sext i32 %i.0 to i64 + %arrayidx2 = getelementptr inbounds %struct._Handleitem*, %struct._Handleitem** %D, i64 %idxprom1 + store %struct._Handleitem* %0, %struct._Handleitem** %arrayidx2, align 8 + %inc = add nsw i32 %i.0, 1 + br label %for.cond4 + +for.end: ; preds = %for.cond + ret void } +