diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
--- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h
@@ -108,8 +108,9 @@
   /// Return true if the indexed reference is 'consecutive' in loop \p L.
   /// An indexed reference is 'consecutive' if the only coefficient that uses
   /// the loop induction variable is the rightmost one, and the access stride is
-  /// smaller than the cache line size \p CLS.
-  bool isConsecutive(const Loop &L, unsigned CLS) const;
+  /// smaller than the cache line size \p CLS. Provide a valid \p Stride value
+  /// if the indexed reference is 'consecutive'.
+  bool isConsecutive(const Loop &L, const SCEV *&Stride, unsigned CLS) const;
 
   /// Retrieve the index of the subscript corresponding to the given loop \p
   /// L. Return a zero-based positive index if the subscript index is
diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
--- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -289,18 +289,12 @@
   LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
 
   const SCEV *RefCost = nullptr;
-  if (isConsecutive(L, CLS)) {
+  const SCEV *Stride = nullptr;
+  if (isConsecutive(L, Stride, CLS)) {
     // If the indexed reference is 'consecutive' the cost is
     // (TripCount*Stride)/CLS.
-    const SCEV *Coeff = getLastCoefficient();
-    const SCEV *ElemSize = Sizes.back();
-    assert(Coeff->getType() == ElemSize->getType() &&
-           "Expecting the same type");
-    const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
     Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
     const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
-    if (SE.isKnownNegative(Stride))
-      Stride = SE.getNegativeSCEV(Stride);
     Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
     TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType);
     const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
@@ -464,7 +458,8 @@
   return allCoeffForLoopAreZero;
 }
 
-bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
+bool IndexedReference::isConsecutive(const Loop &L, const SCEV *&Stride,
+                                     unsigned CLS) const {
   // The indexed reference is 'consecutive' if the only coefficient that uses
   // the loop induction variable is the last one...
   const SCEV *LastSubscript = Subscripts.back();
@@ -478,7 +473,9 @@
   // ...and the access stride is less than the cache line size.
   const SCEV *Coeff = getLastCoefficient();
   const SCEV *ElemSize = Sizes.back();
-  const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
+  Type *WiderType = SE.getWiderType(Coeff->getType(), ElemSize->getType());
+  Stride = SE.getMulExpr(SE.getNoopOrAnyExtend(Coeff, WiderType),
+                         SE.getNoopOrAnyExtend(ElemSize, WiderType));
   const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
 
   Stride = SE.isKnownNegative(Stride) ? SE.getNegativeSCEV(Stride) : Stride;
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
--- a/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/PowerPC/compute-cost.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -opaque-pointers -passes='print<loop-cache-cost>' -disable-output 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-n32:64"
 target triple = "powerpc64le-unknown-linux-gnu"
@@ -33,7 +33,54 @@
   ret void
 }
 
+; Check IndexedReference::computeRefCost can handle type differences between
+; Coeff and ElemSize.
+
+; CHECK: Loop 'for.cond' has cost = 100000000
+; CHECK: Loop 'for.cond1' has cost = 1000000
+; CHECK: Loop 'for.cond5' has cost = 30000
+
+@data = external dso_local global [2 x [4 x [18 x i32]]], align 1
+
+define dso_local void @handle_to_ptr_2(i1 %b0, i1 %b1, i1 %b2) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i.0 = phi i16 [ 0, %entry ], [ %inc18, %for.inc17 ]
+  %idxprom = zext i16 %i.0 to i32
+  br i1 %b2, label %for.end19, label %for.cond1
+
+for.cond1:
+  %j.0 = phi i16 [ %inc15, %for.inc14 ], [ 0, %for.cond ]
+  br i1 %b1, label %for.inc17, label %for.cond5.preheader
 
+for.cond5.preheader:
+  %idxprom10 = zext i16 %j.0 to i32
+  br label %for.cond5
+
+for.cond5:
+  %k.0 = phi i16 [ %inc, %for.inc ], [ 0, %for.cond5.preheader ]
+  br i1 %b0, label %for.inc14, label %for.inc
+
+for.inc:
+  %idxprom12 = zext i16 %k.0 to i32
+  %arrayidx13 = getelementptr inbounds [2 x [4 x [18 x i32]]], ptr @data, i32 0, i32 %idxprom, i32 %idxprom10, i32 %idxprom12
+  store i32 7, ptr %arrayidx13, align 1
+  %inc = add nuw nsw i16 %k.0, 1
+  br label %for.cond5
+
+for.inc14:
+  %inc15 = add nuw nsw i16 %j.0, 1
+  br label %for.cond1
+
+for.inc17:
+  %inc18 = add nuw nsw i16 %i.0, 1
+  br label %for.cond
+
+for.end19:
+  ret void
+}
 
 ; Check IndexedReference::computeRefCost can handle negative stride
 
diff --git a/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll b/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
--- a/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
+++ b/llvm/test/Analysis/LoopCacheAnalysis/compute-cost.ll
@@ -35,7 +35,56 @@
   ret void
 }
 
+; Check IndexedReference::computeRefCost can handle type differences between
+; Coeff and ElemSize.
+
+; SMALLER-CACHELINE: Loop 'for.cond' has cost = 100000000
+; SMALLER-CACHELINE: Loop 'for.cond1' has cost = 1000000
+; SMALLER-CACHELINE: Loop 'for.cond5' has cost = 120000
+; LARGER-CACHELINE: Loop 'for.cond' has cost = 100000000
+; LARGER-CACHELINE: Loop 'for.cond1' has cost = 1000000
+; LARGER-CACHELINE: Loop 'for.cond5' has cost = 10000
+@data = external dso_local global [2 x [4 x [18 x i32]]], align 1
+
+define dso_local void @handle_to_ptr_2(i1 %b0, i1 %b1, i1 %b2) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i.0 = phi i16 [ 0, %entry ], [ %inc18, %for.inc17 ]
+  %idxprom = zext i16 %i.0 to i32
+  br i1 %b2, label %for.end19, label %for.cond1
+
+for.cond1:
+  %j.0 = phi i16 [ %inc15, %for.inc14 ], [ 0, %for.cond ]
+  br i1 %b1, label %for.inc17, label %for.cond5.preheader
+
+for.cond5.preheader:
+  %idxprom10 = zext i16 %j.0 to i32
+  br label %for.cond5
 
+for.cond5:
+  %k.0 = phi i16 [ %inc, %for.inc ], [ 0, %for.cond5.preheader ]
+  br i1 %b0, label %for.inc14, label %for.inc
+
+for.inc:
+  %idxprom12 = zext i16 %k.0 to i32
+  %arrayidx13 = getelementptr inbounds [2 x [4 x [18 x i32]]], ptr @data, i32 0, i32 %idxprom, i32 %idxprom10, i32 %idxprom12
+  store i32 7, ptr %arrayidx13, align 1
+  %inc = add nuw nsw i16 %k.0, 1
+  br label %for.cond5
+
+for.inc14:
+  %inc15 = add nuw nsw i16 %j.0, 1
+  br label %for.cond1
+
+for.inc17:
+  %inc18 = add nuw nsw i16 %i.0, 1
+  br label %for.cond
+
+for.end19:
+  ret void
+}
 
 ; Check IndexedReference::computeRefCost can handle negative stride