diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -322,9 +322,11 @@
 
   /// Estimate the cost of a chain of pointers (typically pointer operands of a
   /// chain of loads or stores within same block) operations set when lowered.
+  /// \p AccessTy is the type of the loads/stores that will ultimately use the
+  /// \p Ptrs.
   InstructionCost
   getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
-                       const PointersChainInfo &Info,
+                       const PointersChainInfo &Info, Type *AccessTy,
                        TargetCostKind CostKind = TTI::TCK_RecipThroughput
 
   ) const;
@@ -1665,7 +1667,7 @@
                                      TTI::TargetCostKind CostKind) = 0;
   virtual InstructionCost
   getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
-                       const TTI::PointersChainInfo &Info,
+                       const TTI::PointersChainInfo &Info, Type *AccessTy,
                        TTI::TargetCostKind CostKind) = 0;
   virtual unsigned getInliningThresholdMultiplier() = 0;
   virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
@@ -2026,8 +2028,9 @@
   InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
                                        const Value *Base,
                                        const PointersChainInfo &Info,
+                                       Type *AccessTy,
                                        TargetCostKind CostKind) override {
-    return Impl.getPointersChainCost(Ptrs, Base, Info, CostKind);
+    return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
   }
   unsigned getInliningThresholdMultiplier() override {
     return Impl.getInliningThresholdMultiplier();
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1041,6 +1041,7 @@
   InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
                                        const Value *Base,
                                        const TTI::PointersChainInfo &Info,
+                                       Type *AccessTy,
                                        TTI::TargetCostKind CostKind) {
     InstructionCost Cost = TTI::TCC_Free;
     // In the basic model we take into account GEP instructions only
@@ -1050,15 +1051,28 @@
     // pointers are relative to the same base address, all the rest are
     // either GEP instructions, PHIs, bitcasts or constants. When we have same
     // base, we just calculate cost of each non-Base GEP as an ADD operation if
-    // any their index is a non-const.
+    // any their index isn't foldable.
     // If no known dependecies between the pointers cost is calculated as a sum
     // of costs of GEP instructions.
-    for (const Value *V : Ptrs) {
+    for (auto [I, V] : enumerate(Ptrs)) {
       const auto *GEP = dyn_cast<GetElementPtrInst>(V);
       if (!GEP)
         continue;
       if (Info.isSameBase() && V != Base) {
-        if (GEP->hasAllConstantIndices())
+	if (GEP->hasAllConstantIndices())
+	  continue;
+        // If the chain is unit-stride and BaseReg + stride*i is a legal
+        // addressing mode, then presume the base GEP is sitting around in a
+        // register somewhere and check if we can fold the offset relative to
+        // it.
+        unsigned Stride = DL.getTypeStoreSize(AccessTy);
+        if (Info.isUnitStride() &&
+            static_cast<T *>(this)->isLegalAddressingMode(
+                AccessTy,
+                /* BaseGV */ nullptr,
+                /* BaseOffset */ Stride * I,
+                /* HasBaseReg */ true,
+                /* Scale */ 0, GEP->getType()->getPointerAddressSpace()))
           continue;
         Cost += static_cast<T *>(this)->getArithmeticInstrCost(
             Instruction::Add, GEP->getType(), CostKind,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -230,10 +230,11 @@
 
 InstructionCost TargetTransformInfo::getPointersChainCost(
     ArrayRef<const Value *> Ptrs, const Value *Base,
-    const TTI::PointersChainInfo &Info, TTI::TargetCostKind CostKind) const {
+    const TTI::PointersChainInfo &Info, Type *AccessTy,
+    TTI::TargetCostKind CostKind) const {
   assert((Base || !Info.isSameBase()) &&
          "If pointers have same base address it has to be provided.");
-  return TTIImpl->getPointersChainCost(Ptrs, Base, Info, CostKind);
+  return TTIImpl->getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
 }
 
 unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -180,6 +180,7 @@
   InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
                                        const Value *Base,
                                        const TTI::PointersChainInfo &Info,
+                                       Type *AccessTy,
                                        TTI::TargetCostKind CostKind);
   InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
                                             const SCEV *Ptr);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -4943,9 +4943,11 @@
   return Cost + LT.first;
 }
 
-InstructionCost X86TTIImpl::getPointersChainCost(
-    ArrayRef<const Value *> Ptrs, const Value *Base,
-    const TTI::PointersChainInfo &Info, TTI::TargetCostKind CostKind) {
+InstructionCost
+X86TTIImpl::getPointersChainCost(ArrayRef<const Value *> Ptrs,
+                                 const Value *Base,
+                                 const TTI::PointersChainInfo &Info,
+                                 Type *AccessTy, TTI::TargetCostKind CostKind) {
   if (Info.isSameBase() && Info.isKnownStride()) {
     // If all the pointers have known stride all the differences are translated
     // into constants. X86 memory addressing allows encoding it into
@@ -4957,7 +4959,7 @@
     }
     return TTI::TCC_Free;
   }
-  return BaseT::getPointersChainCost(Ptrs, Base, Info, CostKind);
+  return BaseT::getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
 }
 
 InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty,
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7264,7 +7264,8 @@
       // stay in vectorized code due to uses outside of these scalar
       // loads/stores.
       ScalarCost = TTI->getPointersChainCost(
-          Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), CostKind);
+          Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), ScalarTy,
+          CostKind);
 
       SmallVector<const Value *> PtrsRetainedInVecCode;
       for (Value *V : Ptrs) {
@@ -7290,7 +7291,7 @@
       }
       VecCost = TTI->getPointersChainCost(
           PtrsRetainedInVecCode, BasePtr,
-          TTI::PointersChainInfo::getKnownStride(), CostKind);
+          TTI::PointersChainInfo::getKnownStride(), VecTy, CostKind);
     } else {
       // Case 1: Ptrs are the arguments of loads that we are going to transform
       // into masked gather load intrinsic.
@@ -7306,7 +7307,8 @@
               ? TTI::PointersChainInfo::getUnknownStride()
               : TTI::PointersChainInfo::getKnownStride();
 
-      ScalarCost = TTI->getPointersChainCost(Ptrs, BasePtr, PtrsInfo, CostKind);
+      ScalarCost = TTI->getPointersChainCost(Ptrs, BasePtr, PtrsInfo, ScalarTy,
+                                             CostKind);
 
       // Remark: it not quite correct to use scalar GEP cost for a vector GEP,
       // but it's not clear how to do that without having vector GEP arguments
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll
@@ -0,0 +1,67 @@
+; RUN: opt -mtriple=riscv64 -mattr=+v -riscv-v-slp-max-vf=0 -passes=slp-vectorizer -disable-output -debug-only=SLP < %s 2>&1 | FileCheck %s
+
+; Because all of these addresses are foldable, the scalar cost should be 0 when
+; computing the pointers chain cost.
+define void @f(ptr %dest, i64 %i) {
+; CHECK-LABEL: SLP: Analyzing blocks in f.
+; CHECK: SLP: Calculated GEPs cost for Tree:
+; CHECK: SLP: Costs:
+; CHECK: SLP:     ReuseShuffleCost = 0
+; CHECK-NEXT: SLP:     VectorCost = 0
+; CHECK-NEXT: SLP:     ScalarCost = 0
+; CHECK-NEXT: SLP:     ReuseShuffleCost + VecCost - ScalarCost = 0
+entry:
+  %p1 = getelementptr i32, ptr %dest, i32 0
+  store i32 1, ptr %p1
+  %p2 = getelementptr i32, ptr %dest, i32 1
+  store i32 1, ptr %p2
+  %p3 = getelementptr i32, ptr %dest, i32 2
+  store i32 1, ptr %p3
+  %p4 = getelementptr i32, ptr %dest, i32 3
+  store i32 1, ptr %p4
+  ret void
+}
+
+; When computing the scalar pointers chain cost here, there is a cost of 1 for
+; the base pointer, and the rest can be folded in, so the scalar cost should be
+; 1.
+define void @g(ptr %dest, i64 %i) {
+; CHECK-LABEL: SLP: Analyzing blocks in g.
+; CHECK: SLP: Calculated GEPs cost for Tree:
+; CHECK: SLP: Costs:
+; CHECK-NEXT: SLP:     ReuseShuffleCost = 0
+; CHECK-NEXT: SLP:     VectorCost = 1
+; CHECK-NEXT: SLP:     ScalarCost = 1
+entry:
+  %p1 = getelementptr i32, ptr %dest, i32 2048
+  store i32 1, ptr %p1
+  %p2 = getelementptr i32, ptr %dest, i32 2049
+  store i32 1, ptr %p2
+  %p3 = getelementptr i32, ptr %dest, i32 2050
+  store i32 1, ptr %p3
+  %p4 = getelementptr i32, ptr %dest, i32 2051
+  store i32 1, ptr %p4
+  ret void
+}
+
+; When computing the scalar pointers chain cost here, there is a cost of 1 for
+; the base pointer, and the rest can be folded in, so the scalar cost should be
+; 1.
+define void @h(ptr %dest, i32 %i) {
+; CHECK-LABEL: SLP: Analyzing blocks in h.
+; CHECK: SLP: Calculated GEPs cost for Tree:
+; CHECK: SLP: Costs:
+; CHECK-NEXT: SLP:     ReuseShuffleCost = 0
+; CHECK-NEXT: SLP:     VectorCost = 1
+; CHECK-NEXT: SLP:     ScalarCost = 1
+entry:
+  %p1 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 0
+  store i32 1, ptr %p1
+  %p2 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 1
+  store i32 1, ptr %p2
+  %p3 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 2
+  store i32 1, ptr %p3
+  %p4 = getelementptr [4 x i32], ptr %dest, i32 %i, i32 3
+  store i32 1, ptr %p4
+  ret void
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/struct-gep.ll
@@ -2,7 +2,8 @@
 ; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v \
 ; RUN: -riscv-v-slp-max-vf=0 -S | FileCheck %s
 
-; FIXME: This should not be vectorized
+; This shouldn't be vectorized as the extra address computation required for the
+; vector instructions make it unprofitable
 
 %struct.2i32 = type { i32, i32 }
 
@@ -10,7 +11,9 @@
 ; CHECK-LABEL: @splat_store_v2i32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr [[STRUCT_2I32:%.*]], ptr [[DEST:%.*]], i64 [[I:%.*]], i32 0
-; CHECK-NEXT:    store <2 x i32> <i32 1, i32 1>, ptr [[P1]], align 4
+; CHECK-NEXT:    store i32 1, ptr [[P1]], align 4
+; CHECK-NEXT:    [[P2:%.*]] = getelementptr [[STRUCT_2I32]], ptr [[DEST]], i64 [[I]], i32 1
+; CHECK-NEXT:    store i32 1, ptr [[P2]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry: