Index: lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- lib/Target/PowerPC/PPCISelLowering.cpp
+++ lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10814,6 +10814,14 @@
   }
 
   MVT VecTy = N->getValueType(0).getSimpleVT();
+
+  // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
+  // aligned and the type is a vector with elements up to 4 bytes
+  if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
+      && VecTy.getScalarSizeInBits() <= 32 ) {
+    return SDValue();
+  }
+
   SDValue LoadOps[] = { Chain, Base };
   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
                                          DAG.getVTList(MVT::v2f64, MVT::Other),
@@ -10878,6 +10886,13 @@
   SDValue Src = N->getOperand(SrcOpnd);
   MVT VecTy = Src.getValueType().getSimpleVT();
 
+  // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
+  // aligned and the type is a vector with elements up to 4 bytes
+  if (Subtarget.needsSwapsForVSXMemOps() && !(MMO->getAlignment()%16)
+      && VecTy.getScalarSizeInBits() <= 32 ) {
+    return SDValue();
+  }
+
   // All stores are done as v2f64 and possible bit cast.
   if (VecTy != MVT::v2f64) {
     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
Index: lib/Target/PowerPC/PPCInstrVSX.td
===================================================================
--- lib/Target/PowerPC/PPCInstrVSX.td
+++ lib/Target/PowerPC/PPCInstrVSX.td
@@ -160,7 +160,7 @@
     def STXVW4X : XX1Form<31, 908,
                          (outs), (ins vsrc:$XT, memrr:$dst),
                          "stxvw4x $XT, $dst", IIC_LdStSTFD,
-                         [(store v4i32:$XT, xoaddr:$dst)]>;
+                         []>;
     }
   } // mayStore
 
@@ -1032,6 +1032,7 @@
   def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
   def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
   def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+  def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>;
 }
 
 // Permutes.
Index: test/CodeGen/PowerPC/ppc64-i128-abi.ll
===================================================================
--- test/CodeGen/PowerPC/ppc64-i128-abi.ll
+++ test/CodeGen/PowerPC/ppc64-i128-abi.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN:   -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE
+; RUN:   -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE \
+; RUN:   --implicit-check-not xxswapd
 
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
@@ -8,13 +9,15 @@
 ; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
 
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
+; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \
+; RUN:   --implicit-check-not xxswapd
 
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX
 
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX
+; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | \
+; RUN:   FileCheck %s -check-prefix=CHECK-LE-NOVSX --implicit-check-not xxswapd
 
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr9 -ppc-vsr-nums-as-vr < %s | FileCheck %s \
@@ -26,7 +29,7 @@
 
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
 ; RUN:   -mcpu=pwr9 -mattr=-power9-vector -mattr=-direct-move < %s | \
-; RUN:   FileCheck %s -check-prefix=CHECK-LE
+; RUN:   FileCheck %s -check-prefix=CHECK-LE --implicit-check-not xxswapd
 
 @x = common global <1 x i128> zeroinitializer, align 16
 @y = common global <1 x i128> zeroinitializer, align 16
@@ -199,8 +202,7 @@
        ret <1 x i128> %ret
 
 ; CHECK-LE-LABEL: @call_v1i128_increment_by_one
-; CHECK-LE: lxvd2x [[PARAM:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK-LE: xxswapd 34, [[PARAM]]
+; CHECK-LE: lvx 2, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK-LE: bl v1i128_increment_by_one
 ; CHECK-LE: blr
 
@@ -229,10 +231,8 @@
        ret <1 x i128> %ret
 
 ; CHECK-LE-LABEL: @call_v1i128_increment_by_val
-; CHECK-LE: lxvd2x [[PARAM1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK-LE: lxvd2x [[PARAM2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
-; CHECK-LE-DAG: xxswapd 34, [[PARAM1]]
-; CHECK-LE-DAG: xxswapd 35, [[PARAM2]]
+; CHECK-LE: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE: lvx 3, {{[0-9]+}}, {{[0-9]+}}
 ; CHECK-LE: bl v1i128_increment_by_val
 ; CHECK-LE: blr
 
Index: test/CodeGen/PowerPC/swaps-le-1.ll
===================================================================
--- test/CodeGen/PowerPC/swaps-le-1.ll
+++ test/CodeGen/PowerPC/swaps-le-1.ll
@@ -13,6 +13,12 @@
 ; RUN:  -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s \
 ; RUN:  | FileCheck -check-prefix=NOOPTSWAP %s
 
+; LH: 2016-11-17
+;   Updated align attritue from 16 to 8 to keep swap instructions tests.
+;   Changes have been made on little-endian to use lvx and stvx
+;   instructions instead of lxvd2x/xxswapd and xxswapd/stxvd2x for
+;   aligned vectors with elements up to 4 bytes
+
 ; This test was generated from the following source:
 ;
 ; #define N 4096
@@ -29,10 +35,10 @@
 ;   }
 ; }
 
-@cb = common global [4096 x i32] zeroinitializer, align 16
-@cc = common global [4096 x i32] zeroinitializer, align 16
-@cd = common global [4096 x i32] zeroinitializer, align 16
-@ca = common global [4096 x i32] zeroinitializer, align 16
+@cb = common global [4096 x i32] zeroinitializer, align 8
+@cc = common global [4096 x i32] zeroinitializer, align 8
+@cd = common global [4096 x i32] zeroinitializer, align 8
+@ca = common global [4096 x i32] zeroinitializer, align 8
 
 define void @foo() {
 entry:
@@ -42,63 +48,63 @@
   %index = phi i64 [ 0, %entry ], [ %index.next.3, %vector.body ]
   %0 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index
   %1 = bitcast i32* %0 to <4 x i32>*
-  %wide.load = load <4 x i32>, <4 x i32>* %1, align 16
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 8
   %2 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index
   %3 = bitcast i32* %2 to <4 x i32>*
-  %wide.load13 = load <4 x i32>, <4 x i32>* %3, align 16
+  %wide.load13 = load <4 x i32>, <4 x i32>* %3, align 8
   %4 = add nsw <4 x i32> %wide.load13, %wide.load
   %5 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index
   %6 = bitcast i32* %5 to <4 x i32>*
-  %wide.load14 = load <4 x i32>, <4 x i32>* %6, align 16
+  %wide.load14 = load <4 x i32>, <4 x i32>* %6, align 8
   %7 = mul nsw <4 x i32> %4, %wide.load14
   %8 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index
   %9 = bitcast i32* %8 to <4 x i32>*
-  store <4 x i32> %7, <4 x i32>* %9, align 16
+  store <4 x i32> %7, <4 x i32>* %9, align 8
   %index.next = add nuw nsw i64 %index, 4
   %10 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next
   %11 = bitcast i32* %10 to <4 x i32>*
-  %wide.load.1 = load <4 x i32>, <4 x i32>* %11, align 16
+  %wide.load.1 = load <4 x i32>, <4 x i32>* %11, align 8
   %12 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next
   %13 = bitcast i32* %12 to <4 x i32>*
-  %wide.load13.1 = load <4 x i32>, <4 x i32>* %13, align 16
+  %wide.load13.1 = load <4 x i32>, <4 x i32>* %13, align 8
   %14 = add nsw <4 x i32> %wide.load13.1, %wide.load.1
   %15 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next
   %16 = bitcast i32* %15 to <4 x i32>*
-  %wide.load14.1 = load <4 x i32>, <4 x i32>* %16, align 16
+  %wide.load14.1 = load <4 x i32>, <4 x i32>* %16, align 8
   %17 = mul nsw <4 x i32> %14, %wide.load14.1
   %18 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next
   %19 = bitcast i32* %18 to <4 x i32>*
-  store <4 x i32> %17, <4 x i32>* %19, align 16
+  store <4 x i32> %17, <4 x i32>* %19, align 8
   %index.next.1 = add nuw nsw i64 %index.next, 4
   %20 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next.1
   %21 = bitcast i32* %20 to <4 x i32>*
-  %wide.load.2 = load <4 x i32>, <4 x i32>* %21, align 16
+  %wide.load.2 = load <4 x i32>, <4 x i32>* %21, align 8
   %22 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next.1
   %23 = bitcast i32* %22 to <4 x i32>*
-  %wide.load13.2 = load <4 x i32>, <4 x i32>* %23, align 16
+  %wide.load13.2 = load <4 x i32>, <4 x i32>* %23, align 8
   %24 = add nsw <4 x i32> %wide.load13.2, %wide.load.2
   %25 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next.1
   %26 = bitcast i32* %25 to <4 x i32>*
-  %wide.load14.2 = load <4 x i32>, <4 x i32>* %26, align 16
+  %wide.load14.2 = load <4 x i32>, <4 x i32>* %26, align 8
   %27 = mul nsw <4 x i32> %24, %wide.load14.2
   %28 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next.1
   %29 = bitcast i32* %28 to <4 x i32>*
-  store <4 x i32> %27, <4 x i32>* %29, align 16
+  store <4 x i32> %27, <4 x i32>* %29, align 8
   %index.next.2 = add nuw nsw i64 %index.next.1, 4
   %30 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next.2
   %31 = bitcast i32* %30 to <4 x i32>*
-  %wide.load.3 = load <4 x i32>, <4 x i32>* %31, align 16
+  %wide.load.3 = load <4 x i32>, <4 x i32>* %31, align 8
   %32 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next.2
   %33 = bitcast i32* %32 to <4 x i32>*
-  %wide.load13.3 = load <4 x i32>, <4 x i32>* %33, align 16
+  %wide.load13.3 = load <4 x i32>, <4 x i32>* %33, align 8
   %34 = add nsw <4 x i32> %wide.load13.3, %wide.load.3
   %35 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next.2
   %36 = bitcast i32* %35 to <4 x i32>*
-  %wide.load14.3 = load <4 x i32>, <4 x i32>* %36, align 16
+  %wide.load14.3 = load <4 x i32>, <4 x i32>* %36, align 8
   %37 = mul nsw <4 x i32> %34, %wide.load14.3
   %38 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next.2
   %39 = bitcast i32* %38 to <4 x i32>*
-  store <4 x i32> %37, <4 x i32>* %39, align 16
+  store <4 x i32> %37, <4 x i32>* %39, align 8
   %index.next.3 = add nuw nsw i64 %index.next.2, 4
   %40 = icmp eq i64 %index.next.3, 4096
   br i1 %40, label %for.end, label %vector.body
Index: test/CodeGen/PowerPC/swaps-le-2.ll
===================================================================
--- test/CodeGen/PowerPC/swaps-le-2.ll
+++ test/CodeGen/PowerPC/swaps-le-2.ll
@@ -2,6 +2,13 @@
 
 ; Test swap removal when a vector splat must be adjusted to make it legal.
 ;
+
+; LH: 2016-11-17
+;   Updated align attritue from 16 to 8 to keep swap instructions tests.
+;   Changes have been made on little-endian to use lvx and stvx
+;   instructions instead of lxvd2x/xxswapd and xxswapd/stxvd2x for
+;   aligned vectors with elements up to 4 bytes
+
 ; Test generated from following C code:
 ;
 ; vector char vc = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
@@ -28,37 +35,37 @@
 ;   vir = (vector int){vi[1], vi[1], vi[1], vi[1]};
 ; }
 
-@vc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
-@vs = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
-@vi = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
-@vcr = common global <16 x i8> zeroinitializer, align 16
-@vsr = common global <8 x i16> zeroinitializer, align 16
-@vir = common global <4 x i32> zeroinitializer, align 16
+@vc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 8
+@vs = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 8
+@vi = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 8
+@vcr = common global <16 x i8> zeroinitializer, align 8
+@vsr = common global <8 x i16> zeroinitializer, align 8
+@vir = common global <4 x i32> zeroinitializer, align 8
 
 ; Function Attrs: nounwind
 define void @cfoo() {
 entry:
-  %0 = load <16 x i8>, <16 x i8>* @vc, align 16
+  %0 = load <16 x i8>, <16 x i8>* @vc, align 8
   %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-  store <16 x i8> %vecinit30, <16 x i8>* @vcr, align 16
+  store <16 x i8> %vecinit30, <16 x i8>* @vcr, align 8
   ret void
 }
 
 ; Function Attrs: nounwind
 define void @sfoo() {
 entry:
-  %0 = load <8 x i16>, <8 x i16>* @vs, align 16
+  %0 = load <8 x i16>, <8 x i16>* @vs, align 8
   %vecinit14 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
-  store <8 x i16> %vecinit14, <8 x i16>* @vsr, align 16
+  store <8 x i16> %vecinit14, <8 x i16>* @vsr, align 8
   ret void
 }
 
 ; Function Attrs: nounwind
 define void @ifoo() {
 entry:
-  %0 = load <4 x i32>, <4 x i32>* @vi, align 16
+  %0 = load <4 x i32>, <4 x i32>* @vi, align 8
   %vecinit6 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  store <4 x i32> %vecinit6, <4 x i32>* @vir, align 16
+  store <4 x i32> %vecinit6, <4 x i32>* @vir, align 8
   ret void
 }
 
Index: test/CodeGen/PowerPC/vsx-ldst.ll
===================================================================
--- test/CodeGen/PowerPC/vsx-ldst.ll
+++ test/CodeGen/PowerPC/vsx-ldst.ll
@@ -14,8 +14,10 @@
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -O2 \
 ; RUN:   -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
-; RUN: grep lxvd2x < %t | count 6
-; RUN: grep stxvd2x < %t | count 6
+; RUN: grep lxvd2x < %t | count 3
+; RUN: grep lvx < %t | count 3
+; RUN: grep stxvd2x < %t | count 3
+; RUN: grep stvx < %t | count 3
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O2 \
 ; RUN:   -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
Index: test/CodeGen/PowerPC/vsx.ll
===================================================================
--- test/CodeGen/PowerPC/vsx.ll
+++ test/CodeGen/PowerPC/vsx.ll
@@ -1,8 +1,17 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-REG %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx -fast-isel -O0 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx -fast-isel -O0 < %s | FileCheck -check-prefix=CHECK-FISL %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-LE %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 \
+; RUN:   -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 \
+; RUN:   -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | \
+; RUN:   FileCheck -check-prefix=CHECK-REG %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 \
+; RUN:   -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx -fast-isel -O0 < %s |\
+; RUN:   FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 \
+; RUN:   -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx -fast-isel -O0 < %s |\
+; RUN:   FileCheck -check-prefix=CHECK-FISL %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 \
+; RUN:   -mtriple=powerpc64le-unknown-linux-gnu -mattr=+vsx < %s | \
+; RUN:   FileCheck -check-prefix=CHECK-LE %s
 
 define double @test1(double %a, double %b) {
 entry:
@@ -645,8 +654,8 @@
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test32
-; CHECK-LE: lxvd2x [[V1:[0-9]+]], 0, 3
-; CHECK-LE: xxswapd 34, [[V1]]
+; CHECK-LE: lvx 2, 0, 3
+; CHECK-LE-NOT: xxswapd
 ; CHECK-LE: blr
 }
 
@@ -663,8 +672,8 @@
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test33
-; CHECK-LE: xxswapd [[V1:[0-9]+]], 34
-; CHECK-LE: stxvd2x [[V1]], 0, 3
+; CHECK-LE-NOT: xxswapd
+; CHECK-LE: stvx 2, 0, 3
 ; CHECK-LE: blr
 }
 
@@ -716,8 +725,8 @@
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test34
-; CHECK-LE: lxvd2x [[V1:[0-9]+]], 0, 3
-; CHECK-LE: xxswapd 34, [[V1]]
+; CHECK-LE: lvx 2, 0, 3
+; CHECK-LE-NOT: xxswapd
 ; CHECK-LE: blr
 }
 
@@ -734,8 +743,8 @@
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test35
-; CHECK-LE: xxswapd [[V1:[0-9]+]], 34
-; CHECK-LE: stxvd2x [[V1]], 0, 3
+; CHECK-LE-NOT: xxswapd
+; CHECK-LE: stvx 2, 0, 3
 ; CHECK-LE: blr
 }
 
@@ -1154,9 +1163,9 @@
 ; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3
 ; CHECK-LE-DAG: xxswapd  [[V1:[0-9]+]], [[R1]]
 ; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI
-; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]]
+; CHECK-LE-DAG: lvx 3, 0, [[R2]]
 ; CHECK-LE-DAG: xxspltw 34, [[V1]]
-; CHECK-LE-DAG: xxswapd 35, [[V2]]
+; CHECK-LE-NOT: xxswapd 35, [[V2]]
 ; CHECK-LE: vadduwm 2, 2, 3
 ; CHECK-LE: blr
 }