Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -953,6 +953,8 @@
     SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
                                  const SDLoc &dl) const;
 
+    SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const;
+
     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
     SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
 
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -119,6 +119,8 @@
 
 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
 
+static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
+
 // FIXME: Remove this once the bug has been fixed!
 extern cl::opt<bool> ANDIGlueBug;
 
@@ -640,6 +642,12 @@
     // with merges, splats, etc.
     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
 
+    setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
+    setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
+
     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
@@ -6890,6 +6898,60 @@
                      Op.getOperand(0));
 }
 
+SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
+                                               SelectionDAG &DAG) const {
+
+  // Implements a vector truncate that fits in a vector register as a shuffle.
+  // We want to legalize vector truncates down to where the source fits in
+  // a vector register (and target is therefore smaller than vector register
+  // size).  At that point legalization will try to custom lower the sub-legal
+  // result and get here - where we can contain the truncate as a single target
+  // operation.
+
+  // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
+  //   [MSB1 LSB1][MSB2][MSB2 LSB2] to [LSB1][LSB2]
+  //
+  // We will implement it for big-endian ordering as this (where x denotes
+  // undefined):
+  //   [MSB1 LSB1][MSB2 LSB2][x x][x x][x x][x x][x x][x x] to
+  //   [LSB1][LSB2][x][x][x][x][x][x][x][x][x][x][x][x][x][x]
+  // 
+  // The same operation in little-endian ordering will be:
+  //   [LSB1 MSB1][LSB2 MSB2][x x][x x][x x][x x][x x][x x] to
+  //   [LSB1][LSB2][x][x][x][x][x][x][x][x][x][x][x][x][x][x]
+
+  assert(Op.getValueType().isVector() && "Vector type expected.");
+
+  SDLoc DL(Op);
+  SDValue N1 = Op.getOperand(0);
+  unsigned SrcSize = N1.getValueType().getSizeInBits();
+  assert(SrcSize <= 128 && "Source must be no wider than a legal PPC vector");
+  SDValue WideSrc = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
+
+  EVT TrgVT = Op.getValueType();
+  unsigned TrgNumElts = TrgVT.getVectorNumElements();
+  EVT EltVT = TrgVT.getVectorElementType();
+  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
+  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
+
+  // First list the elements we want to keep.
+  unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
+  SmallVector<int, 16> ShuffV;
+  if (Subtarget.isLittleEndian())
+    for (unsigned i = 0; i < TrgNumElts; ++i)
+      ShuffV.push_back(i * SizeMult);
+  else
+    for (unsigned i = 1; i <= TrgNumElts; ++i)
+      ShuffV.push_back(i * SizeMult - 1);
+
+  // Populate the remaining elements with undefs.
+  for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
+    ShuffV.push_back(i + WideNumElts);
+
+  SDValue Conv = DAG.getNode(ISD::BITCAST, DL, WideVT, WideSrc);
+  return DAG.getVectorShuffle(WideVT, DL, Conv, DAG.getUNDEF(WideVT), ShuffV);
+}
+
 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
 /// possible.
 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -9737,6 +9799,14 @@
       return;
     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
     return;
+  case ISD::TRUNCATE: {
+    EVT TrgVT = N->getValueType(0);
+    if (TrgVT.isVector() &&
+        isOperationCustom(N->getOpcode(), TrgVT) &&
+        N->getOperand(0).getValueType().getSizeInBits() <= 128)
+      Results.push_back(LowerTRUNCATEVector(SDValue(N, 0), DAG));
+    return;
+  }
   case ISD::BITCAST:
     // Don't handle bitcast here.
     return;
Index: llvm/test/CodeGen/PowerPC/vec-trunc.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/vec-trunc.ll
@@ -0,0 +1,90 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE
+
+define void @test8i8(<8 x i8>* nocapture %Sink, <8 x i16>* nocapture readonly %SrcPtr) {
+entry:
+  %0 = load <8 x i16>, <8 x i16>* %SrcPtr, align 16
+  %1 = trunc <8 x i16> %0 to <8 x i8>
+  store <8 x i8> %1, <8 x i8>* %Sink, align 16
+  ret void
+}
+; CHECK-LABEL: @test8i8
+; CHECK: vpkuhum
+; CHECK-BE-LABEL: @test8i8
+; CHECK-BE: vpkuhum
+
+define void @test4i8(<4 x i8>* nocapture %Sink, <4 x i16>* nocapture readonly %SrcPtr) {
+entry:
+  %0 = load <4 x i16>, <4 x i16>* %SrcPtr, align 16
+  %1 = trunc <4 x i16> %0 to <4 x i8>
+  store <4 x i8> %1, <4 x i8>* %Sink, align 16
+  ret void
+}
+; CHECK-LABEL: @test4i8
+; CHECK: vpkuhum
+; CHECK-BE-LABEL: @test4i8
+; CHECK-BE: vpkuhum
+
+define void @test4i8w(<4 x i8>* nocapture %Sink, <4 x i32>* nocapture readonly %SrcPtr) {
+entry:
+  %0 = load <4 x i32>, <4 x i32>* %SrcPtr, align 16
+  %1 = trunc <4 x i32> %0 to <4 x i8>
+  store <4 x i8> %1, <4 x i8>* %Sink, align 16
+  ret void
+}
+; CHECK-LABEL: @test4i8w
+; CHECK: vperm
+; CHECK-BE-LABEL: @test4i8w
+; CHECK-BE: vperm
+
+define void @test2i8(<2 x i8>* nocapture %Sink, <2 x i16>* nocapture readonly %SrcPtr) {
+entry:
+  %0 = load <2 x i16>, <2 x i16>* %SrcPtr, align 16
+  %1 = trunc <2 x i16> %0 to <2 x i8>
+  store <2 x i8> %1, <2 x i8>* %Sink, align 16
+  ret void
+}
+; CHECK-LABEL: @test2i8
+; CHECK: vpkuhum
+; CHECK-BE-LABEL: @test2i8
+; CHECK-BE: vpkuhum
+
+define void @test4i16(<4 x i16>* nocapture %Sink, <4 x i32>* nocapture readonly %SrcPtr) {
+entry:
+  %0 = load <4 x i32>, <4 x i32>* %SrcPtr, align 16
+  %1 = trunc <4 x i32> %0 to <4 x i16>
+  store <4 x i16> %1, <4 x i16>* %Sink, align 16
+  ret void
+}
+; CHECK-LABEL: @test4i16
+; CHECK: vpkuwum
+; CHECK-BE-LABEL: @test4i16
+; CHECK-BE: vpkuwum
+
+define void @test2i16(<2 x i16>* nocapture %Sink, <2 x i32>* nocapture readonly %SrcPtr) {
+entry:
+  %0 = load <2 x i32>, <2 x i32>* %SrcPtr, align 16
+  %1 = trunc <2 x i32> %0 to <2 x i16>
+  store <2 x i16> %1, <2 x i16>* %Sink, align 16
+  ret void
+}
+; CHECK-LABEL: @test2i16
+; CHECK: vpkuwum
+; CHECK-BE-LABEL: @test2i16
+; CHECK-BE: vpkuwum
+
+define void @test2i16d(<2 x i16>* nocapture %Sink, <2 x i64>* nocapture readonly %SrcPtr) {
+entry:
+  %0 = load <2 x i64>, <2 x i64>* %SrcPtr, align 16
+  %1 = trunc <2 x i64> %0 to <2 x i16>
+  store <2 x i16> %1, <2 x i16>* %Sink, align 16
+  ret void
+}
+; CHECK-LABEL: @test2i16d
+; CHECK: vperm
+; CHECK-BE-LABEL: @test2i16d
+; CHECK-BE: vperm