Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10566,6 +10566,20 @@
           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
         return expandVSXLoadForLE(N, DCI);
+
+      // When we load a v4i8, the code can degrade rather quickly. Convert
+      // this to an i32 load and bitcast.
+      if (LoadVT == MVT::v4i8) {
+        SDValue ScalarLoad = DAG.getLoad(MVT::i32, dl, LD->getChain(),
+                                         LD->getBasePtr(), LD->getPointerInfo(),
+                                         false, LD->isNonTemporal(),
+                                         LD->isInvariant(), LD->getAlignment(),
+                                         LD->getAAInfo());
+        SDValue BitCast = DAG.getBitcast(MVT::v4i8, ScalarLoad);
+        return DAG.getNode(ISD::MERGE_VALUES, dl,
+                           DAG.getVTList(MVT::v4i8, MVT::Other),
+                           BitCast, ScalarLoad.getValue(1));
+      }
     }
 
     // We sometimes end up with a 64-bit integer load, from which we extract
Index: test/CodeGen/PowerPC/load-v4i8-improved.ll
===================================================================
--- test/CodeGen/PowerPC/load-v4i8-improved.ll
+++ test/CodeGen/PowerPC/load-v4i8-improved.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \
+; RUN:   --check-prefix=CHECK-BE
+
+define <16 x i8> @test(i32* %s, i32* %t) {
+entry:
+  %0 = bitcast i32* %s to <4 x i8>*
+  %1 = load <4 x i8>, <4 x i8>* %0, align 4
+  %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  ret <16 x i8> %2
+; CHECK: lwz [[GPR:[0-9]+]], 0(3)
+; CHECK: mtvsrd [[VSR:[0-9]+]], [[GPR]]
+; CHECK: xxswapd  [[SWP:[0-9]+]], [[VSR]]
+; CHECK: xxspltw 34, [[SWP]], 3
+; CHECK-NOT: vmrg
+; CHECK-NOT: vperm
+; CHECK-BE: lwz [[GPR:[0-9]+]], 0(3)
+; CHECK-BE: sldi [[SHL:[0-9]+]], [[GPR]], 32
+; CHECK-BE: mtvsrd [[VSR:[0-9]+]], [[SHL]]
+; CHECK-BE: xxspltw 34, [[VSR]], 0
+; CHECK-BE-NOT: vmrg
+; CHECK-BE-NOT: vperm
+}