diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1246,21 +1246,23 @@
       //      %v0 = shuffle %vec, undef, <0, 2, 4, 6>         ; Index 0
       // The cost is estimated as extract elements at 0, 2, 4, 6 from the
       // <8 x i32> vector and insert them into a <4 x i32> vector.
+      APInt ExtractedElts(NumElts, 0);
       for (unsigned Index : Indices) {
         assert(Index < Factor && "Invalid index for interleaved memory op");
 
         // Extract elements from loaded vector for each sub vector.
         for (unsigned Elm = 0; Elm < NumSubElts; Elm++)
-          Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VT,
-                                              Index + Elm * Factor);
+          ExtractedElts.setBit(Index + Elm * Factor);
       }
+      Cost += thisT()->getScalarizationOverhead(
+          VT, ExtractedElts, /*Insert=*/false, /*Extract=*/true);
 
-      InstructionCost InsSubCost = 0;
+      APInt InsertedElts(NumSubElts, 0);
       for (unsigned Elm = 0; Elm < NumSubElts; Elm++)
-        InsSubCost +=
-            thisT()->getVectorInstrCost(Instruction::InsertElement, SubVT, Elm);
-
-      Cost += Indices.size() * InsSubCost;
+        InsertedElts.setBit(Elm);
+      Cost += Indices.size() *
+              thisT()->getScalarizationOverhead(
+                  SubVT, InsertedElts, /*Insert=*/true, /*Extract=*/false);
     } else {
       // The interleave cost is extract elements from sub vectors, and
       // insert them into the wide vector.
@@ -1275,20 +1277,23 @@
       // The cost is estimated as extract all elements (of actual members,
       // excluding gaps) from both <4 x i32> vectors and insert into the <12 x
       // i32> vector.
-      InstructionCost ExtSubCost = 0;
+      APInt ExtractedElts(NumSubElts, 0);
       for (unsigned Elm = 0; Elm < NumSubElts; Elm++)
-        ExtSubCost += thisT()->getVectorInstrCost(Instruction::ExtractElement,
-                                                  SubVT, Elm);
-      Cost += ExtSubCost * Indices.size();
+        ExtractedElts.setBit(Elm);
+      Cost += Indices.size() *
+              thisT()->getScalarizationOverhead(
+                  SubVT, ExtractedElts, /*Insert=*/false, /*Extract=*/true);
 
+      APInt InsertedElts(NumElts, 0);
       for (unsigned Index : Indices) {
         assert(Index < Factor && "Invalid index for interleaved memory op");
 
-        // Insert elements from loaded vector for each sub vector.
+        // Extract elements from loaded vector for each sub vector.
         for (unsigned Elm = 0; Elm < NumSubElts; Elm++)
-          Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VT,
-                                              Index + Elm * Factor);
+          InsertedElts.setBit(Index + Elm * Factor);
       }
+      Cost += thisT()->getScalarizationOverhead(
+          VT, InsertedElts, /*Insert=*/true, /*Extract=*/false);
     }
 
     if (!UseMaskForCond)
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll
@@ -27,7 +27,7 @@
 ; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction:   %v0 = load i16, i16* %in0, align 2
 ; AVX2: LV: Found an estimated cost of 50 for VF 4 For instruction:   %v0 = load i16, i16* %in0, align 2
 ; AVX2: LV: Found an estimated cost of 99 for VF 8 For instruction:   %v0 = load i16, i16* %in0, align 2
-; AVX2: LV: Found an estimated cost of 285 for VF 16 For instruction:   %v0 = load i16, i16* %in0, align 2
+; AVX2: LV: Found an estimated cost of 215 for VF 16 For instruction:   %v0 = load i16, i16* %in0, align 2
 ;
 ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i16, i16* %in0, align 2
 ; AVX512: LV: Found an estimated cost of 11 for VF 2 For instruction:   %v0 = load i16, i16* %in0, align 2
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll
@@ -13,17 +13,17 @@
 ; CHECK: LV: Checking a loop in "test"
 ;
 ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i8, i8* %in0, align 1
-; SSE2: LV: Found an estimated cost of 20 for VF 2 For instruction:   %v0 = load i8, i8* %in0, align 1
-; SSE2: LV: Found an estimated cost of 56 for VF 4 For instruction:   %v0 = load i8, i8* %in0, align 1
-; SSE2: LV: Found an estimated cost of 160 for VF 8 For instruction:   %v0 = load i8, i8* %in0, align 1
-; SSE2: LV: Found an estimated cost of 478 for VF 16 For instruction:   %v0 = load i8, i8* %in0, align 1
+; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction:   %v0 = load i8, i8* %in0, align 1
+; SSE2: LV: Found an estimated cost of 30 for VF 4 For instruction:   %v0 = load i8, i8* %in0, align 1
+; SSE2: LV: Found an estimated cost of 62 for VF 8 For instruction:   %v0 = load i8, i8* %in0, align 1
+; SSE2: LV: Found an estimated cost of 126 for VF 16 For instruction:   %v0 = load i8, i8* %in0, align 1
 ;
 ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i8, i8* %in0, align 1
 ; AVX1: LV: Found an estimated cost of 9 for VF 2 For instruction:   %v0 = load i8, i8* %in0, align 1
 ; AVX1: LV: Found an estimated cost of 17 for VF 4 For instruction:   %v0 = load i8, i8* %in0, align 1
 ; AVX1: LV: Found an estimated cost of 33 for VF 8 For instruction:   %v0 = load i8, i8* %in0, align 1
 ; AVX1: LV: Found an estimated cost of 81 for VF 16 For instruction:   %v0 = load i8, i8* %in0, align 1
-; AVX1: LV: Found an estimated cost of 226 for VF 32 For instruction:   %v0 = load i8, i8* %in0, align 1
+; AVX1: LV: Found an estimated cost of 166 for VF 32 For instruction:   %v0 = load i8, i8* %in0, align 1
 ;
 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction:   %v0 = load i8, i8* %in0, align 1
 ; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction:   %v0 = load i8, i8* %in0, align 1
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll
@@ -13,18 +13,18 @@
 ; CHECK: LV: Checking a loop in "test"
 ;
 ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i8 %v1, i8* %out1, align 1
-; SSE2: LV: Found an estimated cost of 27 for VF 2 For instruction:   store i8 %v1, i8* %out1, align 1
-; SSE2: LV: Found an estimated cost of 79 for VF 4 For instruction:   store i8 %v1, i8* %out1, align 1
-; SSE2: LV: Found an estimated cost of 238 for VF 8 For instruction:   store i8 %v1, i8* %out1, align 1
-; SSE2: LV: Found an estimated cost of 478 for VF 16 For instruction:   store i8 %v1, i8* %out1, align 1
+; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction:   store i8 %v1, i8* %out1, align 1
+; SSE2: LV: Found an estimated cost of 30 for VF 4 For instruction:   store i8 %v1, i8* %out1, align 1
+; SSE2: LV: Found an estimated cost of 62 for VF 8 For instruction:   store i8 %v1, i8* %out1, align 1
+; SSE2: LV: Found an estimated cost of 126 for VF 16 For instruction:   store i8 %v1, i8* %out1, align 1
 ;
 ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i8 %v1, i8* %out1, align 1
 ; AVX1: LV: Found an estimated cost of 9 for VF 2 For instruction:   store i8 %v1, i8* %out1, align 1
 ; AVX1: LV: Found an estimated cost of 17 for VF 4 For instruction:   store i8 %v1, i8* %out1, align 1
 ; AVX1: LV: Found an estimated cost of 33 for VF 8 For instruction:   store i8 %v1, i8* %out1, align 1
-; AVX1: LV: Found an estimated cost of 97 for VF 16 For instruction:   store i8 %v1, i8* %out1, align 1
-; AVX1: LV: Found an estimated cost of 226 for VF 32 For instruction:   store i8 %v1, i8* %out1, align 1
-
+; AVX1: LV: Found an estimated cost of 67 for VF 16 For instruction:   store i8 %v1, i8* %out1, align 1
+; AVX1: LV: Found an estimated cost of 166 for VF 32 For instruction:   store i8 %v1, i8* %out1, align 1
+;
 ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction:   store i8 %v1, i8* %out1, align 1
 ; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction:   store i8 %v1, i8* %out1, align 1
 ; AVX2: LV: Found an estimated cost of 2 for VF 4 For instruction:   store i8 %v1, i8* %out1, align 1