Index: ../../ver4/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- ../../ver4/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ ../../ver4/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1030,6 +1030,18 @@
     return InterleaveGroupMap.count(Instr);
   }
 
+  /// \brief Check if \p Instr belongs to any interleave group but Gather
+  /// variant should be used for vectorization with certain \p VF.
+  bool isInterleavedAndProfitable(Instruction *Instr, unsigned VF) const {
+    return InterleaveGroupMap.count(Instr) &&
+      !PreferGatherSet.count(std::make_pair(Instr, VF));
+  }
+
+  /// \brief Prefer Gather vs Interleave for certain \p VF.
+  void preferGather(Instruction *Instr, unsigned VF) {
+    PreferGatherSet.insert(std::make_pair(Instr, VF));
+  }
+
   /// \brief Return the maximum interleave factor of all interleaved groups.
   unsigned getMaxInterleaveFactor() const {
     unsigned MaxFactor = 1;
@@ -1073,6 +1085,10 @@
   /// Holds the relationships between the members and the interleave group.
   DenseMap<Instruction *, InterleaveGroup *> InterleaveGroupMap;
 
+  /// Holds list of instructions that should be vectorized to "gather" for
+  /// a certain VF.
+  SmallSet<std::pair<Instruction *, unsigned>, 4> PreferGatherSet;
+
   /// Holds dependences among the memory accesses in the loop. It maps a source
   /// access to a set of dependent sink accesses.
   DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences;
@@ -1618,6 +1634,17 @@
     return InterleaveInfo.isInterleaved(Instr);
   }
 
+  /// \brief Check if \p Instr belongs to any interleaved access group
+  /// and interleaving is profitable under certain \p VF.
+  bool isAccessInterleavedAndProfitable(Instruction *Instr, unsigned VF) {
+    return InterleaveInfo.isInterleavedAndProfitable(Instr, VF);
+  }
+
+  /// \brief Prefer Gather vs Interleave for certain \p VF.
+  void preferGather(Instruction *Instr, unsigned VF) {
+    return InterleaveInfo.preferGather(Instr, VF);
+  }
+
   /// \brief Return the maximum interleave factor of all interleaved groups.
   unsigned getMaxInterleaveFactor() const {
     return InterleaveInfo.getMaxInterleaveFactor();
@@ -2811,8 +2838,9 @@
 
   assert((LI || SI) && "Invalid Load/Store instruction");
 
-  // Try to vectorize the interleave group if this access is interleaved.
-  if (Legal->isAccessInterleaved(Instr))
+  // Try to vectorize the interleave group if this access is interleaved
+  // and this kind of vectorization is profitable.
+  if (Legal->isAccessInterleavedAndProfitable(Instr, VF))
     return vectorizeInterleaveGroup(Instr);
 
   Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType();
@@ -7012,12 +7040,26 @@
     if (Legal->isAccessInterleaved(I)) {
       auto Group = Legal->getInterleavedAccessGroup(I);
       assert(Group && "Fail to get an interleaved access group.");
+      unsigned InterleaveFactor = Group->getFactor();
 
+      // Instructions may be combined in "interleaved access" groups, but the
+      // "gather" operation for each of them may be cheaper.
+      // I do not compare "gather" cost vs "interleave pattern", I just assume
+      // that each target provides reasonable MaxInterleaveFactor that
+      // makes the "interleave pattern" profitable. When InterleaveFactor
+      // exceeds the maximum provided by TTI, the Gather, if applicable,
+      // becomes better.
+      if (InterleaveFactor > TTI.getMaxInterleaveFactor(VF) &&
+          Legal->isLegalGatherOrScatter(I)) {
+        Legal->preferGather(I, VF);
+        return TTI.getAddressComputationCost(VectorTy) + 
+          TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
+                                     Legal->isMaskRequired(I), Alignment);
+      }
       // Only calculate the cost once at the insert position.
       if (Group->getInsertPos() != I)
         return 0;
 
-      unsigned InterleaveFactor = Group->getFactor();
       Type *WideVecTy =
           VectorType::get(VectorTy->getVectorElementType(),
                           VectorTy->getVectorNumElements() * InterleaveFactor);
Index: ../../ver4/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll
===================================================================
--- ../../ver4/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll
+++ ../../ver4/test/Transforms/LoopVectorize/X86/gather-vs-interleave.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -O2 -S
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = global [10240 x i32] zeroinitializer, align 16
+@B = global [10240 x i32] zeroinitializer, align 16
+
+; Source code:
+; void foo() {
+;   for (int i=0; i<N; i++)
+;     B[i] = A[i*5];
+; }
+; Interleave pattern is not profitable for stride 5.
+; In this case we should use "gather"
+
+;CHECK-LABEL: @foo
+;CHECK: llvm.masked.gather.v16i32
+;CHECK: ret void
+
+define void @foo() local_unnamed_addr #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %0 = mul nuw nsw i64 %indvars.iv, 5
+  %arrayidx = getelementptr inbounds [10240 x i32], [10240 x i32]* @A, i64 0, i64 %0
+  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds [10240 x i32], [10240 x i32]* @B, i64 0, i64 %indvars.iv
+  store i32 %1, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="skx" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+mpx,+pclmul,+pcommit,+pku,+popcnt,+rdrnd,+rdseed,+rtm,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
+