Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -412,6 +412,35 @@
       return 0;
 
     return Cost;
+
+  } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
+    if (ST->hasP9Altivec()) {
+      if (ISD == ISD::INSERT_VECTOR_ELT)
+        // A move-to VSR and a permute/insert.  Assume vector operation cost
+        // for both (cost will be 2x on P9).
+        return vectorCostAdjustment(2, Opcode, Val, nullptr);
+
+      // It's an extract.  Maybe we can do a cheap move-from VSR.
+      unsigned EltSize = Val->getScalarSizeInBits();
+      if (EltSize == 64) {
+        unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0;
+        if (Index == MfvsrdIndex)
+          return 1;
+      } else if (EltSize == 32) {
+        unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
+        if (Index == MfvsrwzIndex)
+          return 1;
+      }
+
+      // We need a vector extract (or mfvsrld).  Assume vector operation cost.
+      // The cost of the load constant for a vector extract is disregarded
+      // (invariant, easily schedulable).
+      return vectorCostAdjustment(1, Opcode, Val, nullptr);
+      
+    } else if (ST->hasDirectMove())
+      // Assume permute has standard cost.
+      // Assume move-to/move-from VSR have 2x standard cost.
+      return 3;
   }
 
   // Estimated cost of a load-hit-store delay.  This was obtained
Index: llvm/test/Analysis/CostModel/PowerPC/p9.ll
===================================================================
--- llvm/test/Analysis/CostModel/PowerPC/p9.ll
+++ llvm/test/Analysis/CostModel/PowerPC/p9.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck %s
+; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck --check-prefix=CHECK-P8 %s
 ; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -mattr=+vsx | FileCheck --check-prefix=CHECK-P9 %s
 ; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -mattr=+vsx | FileCheck --check-prefix=CHECK-LE %s
 
@@ -41,15 +42,22 @@
   ; CHECK-P9: cost of 2 {{.*}} icmp
 }
 
-define void @test4xi32(<4 x i32> %arg1, <4 x i32> %arg2, <4 x i32>* %arg3) {
+define void @test4xi32(i32 %arg1, <4 x i32>* %arg2) {
 
-  %v1 = load <4 x i32>, <4 x i32>* %arg3
-  store <4 x i32> %arg2, <4 x i32>* %arg3
+  %v1 = load <4 x i32>, <4 x i32>* %arg2
+  %v2 = insertelement <4 x i32> %v1, i32 %arg1, i32 2
+  store <4 x i32> %v2, <4 x i32>* %arg2
 
   ret void
   ; CHECK: cost of 1 {{.*}} load
+  ; CHECK: cost of 10 {{.*}} insertelement
   ; CHECK: cost of 1 {{.*}} store
+  ; CHECK-P8-LABEL: test4xi32
+  ; CHECK-P8: cost of 1 {{.*}} load
+  ; CHECK-P8: cost of 3 {{.*}} insertelement
+  ; CHECK-P8: cost of 1 {{.*}} store
   ; CHECK-P9: cost of 2 {{.*}} load
+  ; CHECK-P9: cost of 4 {{.*}} insertelement
   ; CHECK-P9: cost of 2 {{.*}} store
 }
 
@@ -66,3 +74,56 @@
   ; CHECK-LE: cost of 2 {{.*}} extractelement
   ; CHECK-LE: cost of 0 {{.*}} extractelement
 }
+
+define void @vexti32(<4 x i32> %p1) {
+  %i1 = extractelement <4 x i32> %p1, i32 0
+  %i2 = extractelement <4 x i32> %p1, i32 1
+  %i3 = extractelement <4 x i32> %p1, i32 2
+  %i4 = extractelement <4 x i32> %p1, i32 3
+  ret void
+  ; CHECK: cost of 3 {{.*}} extractelement
+  ; CHECK: cost of 3 {{.*}} extractelement
+  ; CHECK: cost of 3 {{.*}} extractelement
+  ; CHECK: cost of 3 {{.*}} extractelement
+  ; CHECK-P8-LABEL: vexti32
+  ; CHECK-P8: cost of 3 {{.*}} extractelement
+  ; CHECK-P8: cost of 3 {{.*}} extractelement
+  ; CHECK-P8: cost of 3 {{.*}} extractelement
+  ; CHECK-P8: cost of 3 {{.*}} extractelement
+  ; CHECK-P9: cost of 2 {{.*}} extractelement
+  ; CHECK-P9: cost of 1 {{.*}} extractelement
+  ; CHECK-P9: cost of 2 {{.*}} extractelement
+  ; CHECK-P9: cost of 2 {{.*}} extractelement
+  ; CHECK-LE: cost of 2 {{.*}} extractelement
+  ; CHECK-LE: cost of 2 {{.*}} extractelement
+  ; CHECK-LE: cost of 1 {{.*}} extractelement
+  ; CHECK-LE: cost of 2 {{.*}} extractelement
+}
+
+define void @vexti64(<2 x i64> %p1) {
+  %i1 = extractelement <2 x i64> %p1, i32 0
+  %i2 = extractelement <2 x i64> %p1, i32 1
+  ret void
+  ; CHECK: cost of 3 {{.*}} extractelement
+  ; CHECK: cost of 3 {{.*}} extractelement
+  ; CHECK-P8: cost of 3 {{.*}} extractelement
+  ; CHECK-P8: cost of 3 {{.*}} extractelement
+  ; CHECK-P9: cost of 1 {{.*}} extractelement
+  ; CHECK-P9: cost of 2 {{.*}} extractelement
+  ; CHECK-LE: cost of 2 {{.*}} extractelement
+  ; CHECK-LE: cost of 1 {{.*}} extractelement
+}
+
+define void @vext(<8 x i16> %p1, <16 x i8> %p2) {
+  %i1 = extractelement <8 x i16> %p1, i32 0
+  %i2 = extractelement <16 x i8> %p2, i32 0
+  ret void
+  ; CHECK: cost of 3 {{.*}} extractelement
+  ; CHECK: cost of 3 {{.*}} extractelement
+  ; CHECK-P8: cost of 3 {{.*}} extractelement
+  ; CHECK-P8: cost of 3 {{.*}} extractelement
+  ; CHECK-P9: cost of 2 {{.*}} extractelement
+  ; CHECK-P9: cost of 2 {{.*}} extractelement
+  ; CHECK-LE: cost of 2 {{.*}} extractelement
+  ; CHECK-LE: cost of 2 {{.*}} extractelement
+}