Index: include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfo.h
+++ include/llvm/Analysis/TargetTransformInfo.h
@@ -23,6 +23,8 @@
 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
 
 #include "llvm/ADT/Optional.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Operator.h"
@@ -601,14 +603,36 @@
   /// split during legalization. Zero is returned when the answer is unknown.
   unsigned getNumberOfParts(Type *Tp) const;
 
+  /// \brief Provides memory access (pointer stride) information for evaluation
+  /// of address-computation cost 
+  struct AddressAccessInfo {
+    bool  isStrided;        /// True in case the access is strided (AddRec).
+                            /// False otherwise.
+    const SCEV *Step;       /// Holds a pointer to step stride recurrence.
+    AddressAccessInfo() : isStrided(false), Step(nullptr) { }
+    bool isStridedAccess() const { return isStrided; } 
+    bool isConstantStridedAccess() const {
+      return (isStrided && dyn_cast<SCEVConstant>(Step));
+    }
+    bool isConstantStridedAccessLessThan(int64_t MergeDistance) const {
+      if (!isConstantStridedAccess())
+        return false;
+      APInt StrideVal = dyn_cast<SCEVConstant>(Step)->getAPInt();
+      if (StrideVal.getBitWidth() > 64)
+        return false;
+      return StrideVal.getSExtValue() < MergeDistance;
+    } 
+  };
+
   /// \returns The cost of the address computation. For most targets this can be
   /// merged into the instruction indexing mode. Some targets might want to
   /// distinguish between address computation for memory operations on vector
   /// types and scalar types. Such targets should override this function.
-  /// The 'IsComplex' parameter is a hint that the address computation is likely
-  /// to involve multiple instructions and as such unlikely to be merged into
-  /// the address indexing mode.
-  int getAddressComputationCost(Type *Ty, bool IsComplex = false) const;
+  /// The 'AddressInfo' parameter holds some information about the address 
+  /// access so the target can decide if it requires some extra computation 
+  /// or it's likely to be merged into the address indexing modes.
+  int getAddressComputationCost(Type *Ty, 
+                                AddressAccessInfo *AddressInfo = nullptr) const;
 
   /// \returns The cost, if any, of keeping values of the given types alive
   /// over a callsite.
@@ -787,7 +811,8 @@
   virtual int getCallInstrCost(Function *F, Type *RetTy,
                                ArrayRef<Type *> Tys) = 0;
   virtual unsigned getNumberOfParts(Type *Tp) = 0;
-  virtual int getAddressComputationCost(Type *Ty, bool IsComplex) = 0;
+  virtual int getAddressComputationCost(Type *Ty, 
+                                        AddressAccessInfo *AddressInfo) = 0;
   virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
                                   MemIntrinsicInfo &Info) = 0;
@@ -1036,8 +1061,9 @@
   unsigned getNumberOfParts(Type *Tp) override {
     return Impl.getNumberOfParts(Tp);
   }
-  int getAddressComputationCost(Type *Ty, bool IsComplex) override {
-    return Impl.getAddressComputationCost(Ty, IsComplex);
+  int getAddressComputationCost(Type *Ty, 
+                                AddressAccessInfo *AddressInfo) override {
+    return Impl.getAddressComputationCost(Ty, AddressInfo);
   }
   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
     return Impl.getCostOfKeepingLiveOverCall(Tys);
Index: include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfoImpl.h
+++ include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -370,7 +370,9 @@
 
   unsigned getNumberOfParts(Type *Tp) { return 0; }
 
-  unsigned getAddressComputationCost(Type *Tp, bool) { return 0; }
+  unsigned getAddressComputationCost(Type *Tp, TTI::AddressAccessInfo *) {
+    return 0; 
+  }
 
   unsigned getReductionCost(unsigned, Type *, bool) { return 1; }
 
Index: include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- include/llvm/CodeGen/BasicTTIImpl.h
+++ include/llvm/CodeGen/BasicTTIImpl.h
@@ -923,7 +923,9 @@
     return LT.first;
   }
 
-  unsigned getAddressComputationCost(Type *Ty, bool IsComplex) { return 0; }
+  unsigned getAddressComputationCost(Type *Ty, TTI::AddressAccessInfo *) {
+    return 0; 
+  }
 
   unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) {
     assert(Ty->isVectorTy() && "Expect a vector type");
Index: lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- lib/Analysis/TargetTransformInfo.cpp
+++ lib/Analysis/TargetTransformInfo.cpp
@@ -389,8 +389,8 @@
 }
 
 int TargetTransformInfo::getAddressComputationCost(Type *Tp,
-                                                   bool IsComplex) const {
-  int Cost = TTIImpl->getAddressComputationCost(Tp, IsComplex);
+    AddressAccessInfo *AddressInfo) const {
+  int Cost = TTIImpl->getAddressComputationCost(Tp, AddressInfo);
   assert(Cost >= 0 && "TTI should not produce negative costs!");
   return Cost;
 }
Index: lib/Target/AArch64/AArch64TargetTransformInfo.h
===================================================================
--- lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -104,7 +104,7 @@
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
 
-  int getAddressComputationCost(Type *Ty, bool IsComplex);
+  int getAddressComputationCost(Type *Ty, TTI::AddressAccessInfo *AddressInfo);
 
   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
 
Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -417,14 +417,18 @@
   }
 }
 
-int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int AArch64TTIImpl::getAddressComputationCost(Type *Ty, 
+                    TTI::AddressAccessInfo *AddressInfo) {
   // Address computations in vectorized code with non-consecutive addresses will
   // likely result in more instructions compared to scalar code where the
   // computation can more often be merged into the index mode. The resulting
   // extra micro-ops can significantly decrease throughput.
   unsigned NumVectorInstToHideOverhead = 10;
+  int MaxMergeDistance = 64;
 
-  if (Ty->isVectorTy() && IsComplex)
+  if (Ty->isVectorTy() && 
+      AddressInfo && 
+      !AddressInfo->isConstantStridedAccessLessThan(MaxMergeDistance + 1))
     return NumVectorInstToHideOverhead;
 
   // In many cases the address computation is not merged into the instruction
Index: lib/Target/ARM/ARMTargetTransformInfo.h
===================================================================
--- lib/Target/ARM/ARMTargetTransformInfo.h
+++ lib/Target/ARM/ARMTargetTransformInfo.h
@@ -104,7 +104,7 @@
 
   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
 
-  int getAddressComputationCost(Type *Val, bool IsComplex);
+  int getAddressComputationCost(Type *Val, TTI::AddressAccessInfo *AddressInfo);
 
   int getFPOpCost(Type *Ty);
 
Index: lib/Target/ARM/ARMTargetTransformInfo.cpp
===================================================================
--- lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -338,14 +338,18 @@
   return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
 }
 
-int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int ARMTTIImpl::getAddressComputationCost(Type *Ty, 
+                                          TTI::AddressAccessInfo *AddressInfo) {
   // Address computations in vectorized code with non-consecutive addresses will
   // likely result in more instructions compared to scalar code where the
   // computation can more often be merged into the index mode. The resulting
   // extra micro-ops can significantly decrease throughput.
   unsigned NumVectorInstToHideOverhead = 10;
+  int MaxMergeDistance = 64;
 
-  if (Ty->isVectorTy() && IsComplex)
+  if (Ty->isVectorTy() && 
+      AddressInfo &&
+      !AddressInfo->isConstantStridedAccessLessThan(MaxMergeDistance + 1))
     return NumVectorInstToHideOverhead;
 
   // In many cases the address computation is not merged into the instruction
Index: lib/Target/X86/X86TargetTransformInfo.h
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.h
+++ lib/Target/X86/X86TargetTransformInfo.h
@@ -71,7 +71,8 @@
                             unsigned AddressSpace);
   int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
                              bool VariableMask, unsigned Alignment);
-  int getAddressComputationCost(Type *PtrTy, bool IsComplex);
+  int getAddressComputationCost(Type *PtrTy, 
+                                TTI::AddressAccessInfo *AddressInfo);
 
   int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
                             ArrayRef<Type *> Tys, FastMathFlags FMF);
Index: lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.cpp
+++ lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1563,17 +1563,29 @@
   return Cost+LT.first;
 }
 
-int X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int X86TTIImpl::getAddressComputationCost(Type *Ty, 
+                TTI::AddressAccessInfo *AddressInfo) {
   // Address computations in vectorized code with non-consecutive addresses will
   // likely result in more instructions compared to scalar code where the
   // computation can more often be merged into the index mode. The resulting
   // extra micro-ops can significantly decrease throughput.
   unsigned NumVectorInstToHideOverhead = 10;
 
-  if (Ty->isVectorTy() && IsComplex)
-    return NumVectorInstToHideOverhead;
+  // Cost modeling of Strided Access Computation is hidden by the indexing
+  // modes of X86 regardless of the stride value. We dont believe that there
+  // is a difference between constant strided access in gerenal and constant
+  // strided value which is less than or equal to 64.
+  // Even in the case of (loop invariant) stride whose value is not known at
+  // compile time, the address computation will not incur more than one extra
+  // ADD instruction.
+  if (Ty->isVectorTy() && AddressInfo) {
+    if (!AddressInfo->isStridedAccess()) 
+      return NumVectorInstToHideOverhead;
+    if (!AddressInfo->isConstantStridedAccess())  
+      return 1;
+  }
 
-  return BaseT::getAddressComputationCost(Ty, IsComplex);
+  return BaseT::getAddressComputationCost(Ty, AddressInfo);
 }
 
 int X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6774,22 +6774,21 @@
   return Cost;
 }
 
-/// \brief Check whether the address computation for a non-consecutive memory
-/// access looks like an unlikely candidate for being merged into the indexing
-/// mode.
+/// \brief Gets Address Access Complexity Information like if it's strided
+/// or not and in case if it's a constant stride it gets the stride value.
 ///
-/// We look for a GEP which has one index that is an induction variable and all
-/// other indices are loop invariant. If the stride of this access is also
-/// within a small bound we decide that this address computation can likely be
-/// merged into the addressing mode.
-/// In all other cases, we identify the address computation as complex.
-static bool isLikelyComplexAddressComputation(Value *Ptr,
-                                              LoopVectorizationLegality *Legal,
-                                              ScalarEvolution *SE,
-                                              const Loop *TheLoop) {
+/// This Info can be sent to the Target in order to estimate the address
+/// calculation cost.
+static TargetTransformInfo::AddressAccessInfo getAddressAccessComplexity(
+              Value *Ptr,
+              LoopVectorizationLegality *Legal,
+              ScalarEvolution *SE,
+              const Loop *TheLoop) {
+  TargetTransformInfo::AddressAccessInfo AddressInfo;
+
   auto *Gep = dyn_cast<GetElementPtrInst>(Ptr);
   if (!Gep)
-    return true;
+    return AddressInfo;
 
   // We are looking for a gep with all loop invariant indices except for one
   // which should be an induction variable.
@@ -6798,33 +6797,21 @@
     Value *Opd = Gep->getOperand(i);
     if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
         !Legal->isInductionVariable(Opd))
-      return true;
+      return AddressInfo;
   }
 
   // Now we know we have a GEP ptr, %inv, %ind, %inv. Make sure that the step
   // can likely be merged into the address computation.
-  unsigned MaxMergeDistance = 64;
 
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Ptr));
   if (!AddRec)
-    return true;
-
-  // Check the step is constant.
-  const SCEV *Step = AddRec->getStepRecurrence(*SE);
-  // Calculate the pointer stride and check if it is consecutive.
-  const auto *C = dyn_cast<SCEVConstant>(Step);
-  if (!C)
-    return true;
-
-  const APInt &APStepVal = C->getAPInt();
-
-  // Huge step value - give up.
-  if (APStepVal.getBitWidth() > 64)
-    return true;
+    return AddressInfo;
 
-  int64_t StepVal = APStepVal.getSExtValue();
+  AddressInfo.isStrided = true;
 
-  return StepVal > MaxMergeDistance;
+  // Get Step Recurrence.
+  AddressInfo.Step = AddRec->getStepRecurrence(*SE);
+  return AddressInfo;
 }
 
 static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
@@ -7052,12 +7039,13 @@
       unsigned Cost = 0;
       Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
 
-      // True if the memory instruction's address computation is complex.
-      bool IsComplexComputation =
-          isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
+      // Figure out whether the access is strided and get the stride value
+      // if it's known in compile time
+      TargetTransformInfo::AddressAccessInfo AddressInfo = 
+        getAddressAccessComplexity(Ptr, Legal, SE, TheLoop);
 
       // Get the cost of the scalar memory instruction and address computation.
-      Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
+      Cost += VF * TTI.getAddressComputationCost(PtrTy, &AddressInfo);
       Cost += VF *
               TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
                                   Alignment, AS);