diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -14794,16 +14794,18 @@
 // On little endian, that's just the corresponding element in the other
 // half of the vector. On big endian, it is in the same half but right
 // justified rather than left justified in that half.
-static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
-                                            int LHSMaxIdx, int RHSMinIdx,
-                                            int RHSMaxIdx, int HalfVec,
-                                            unsigned ValidLaneWidth,
-                                            const PPCSubtarget &Subtarget) {
+static void fixupShuffleMaskForPermutedSToV(
+    SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
+    int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSValidLaneWidth,
+    unsigned RHSValidLaneWidth, const PPCSubtarget &Subtarget) {
   for (int i = 0, e = ShuffV.size(); i < e; i++) {
     int Idx = ShuffV[i];
-    if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
+    if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
       ShuffV[i] +=
-          Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
+          Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSValidLaneWidth;
+    if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
+      ShuffV[i] +=
+          Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSValidLaneWidth;
   }
 }
 
@@ -14889,36 +14891,31 @@
   SDValue SToVLHS = isScalarToVec(LHS);
   SDValue SToVRHS = isScalarToVec(RHS);
   if (SToVLHS || SToVRHS) {
-    // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
-    // same type and have differing element sizes, then do not perform
-    // the following transformation. The current transformation for
-    // SCALAR_TO_VECTOR assumes that both input vectors have the same
-    // element size. This will be updated in the future to account for
-    // differing sizes of the LHS and RHS.
-    if (SToVLHS && SToVRHS &&
-        (SToVLHS.getValueType().getScalarSizeInBits() !=
-         SToVRHS.getValueType().getScalarSizeInBits()))
-      return Res;
-
-    int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
-                            : SToVRHS.getValueType().getVectorNumElements();
-    int NumEltsOut = ShuffV.size();
+    int ShuffleEltWidth =
+        SVN->getValueType(0).getVectorElementType().getSizeInBits();
+    int ShuffleNumElts = ShuffV.size();
+    int HalfVec = ShuffleNumElts / 2;
     // The width of the "valid lane" (i.e. the lane that contains the value that
     // is vectorized) needs to be expressed in terms of the number of elements
     // of the shuffle. It is thereby the ratio of the values before and after
-    // any bitcast.
-    unsigned ValidLaneWidth =
-        SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
-                      LHS.getValueType().getScalarSizeInBits()
-                : SToVRHS.getValueType().getScalarSizeInBits() /
-                      RHS.getValueType().getScalarSizeInBits();
+    // any bitcast, which will be set later on if the LHS or RHS are
+    // SCALAR_TO_VECTOR nodes.
+    unsigned LHSValidLaneWidth = HalfVec;
+    unsigned RHSValidLaneWidth = HalfVec;
 
     // Initially assume that neither input is permuted. These will be adjusted
     // accordingly if either input is.
-    int LHSMaxIdx = -1;
-    int RHSMinIdx = -1;
-    int RHSMaxIdx = -1;
-    int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
+    int LHSFirstElt = 0;
+    int RHSFirstElt = ShuffleNumElts;
+    int LHSLastElt = -1;
+    int RHSLastElt = -1;
+
+    // The scalar size of the LHS and RHS is initially assumed to be 128, as
+    // 128 is the default scalar size for any vector.
+    // If the LHS or RHS are SCALAR_TO_VECTOR nodes, we will adjust these values
+    // to the size of the scalar input to the SCALAR_TO_VECTOR later on.
+    int LHSScalarSize = 128;
+    int RHSScalarSize = 128;
 
     // Get the permuted scalar to vector nodes for the source(s) that come from
     // ISD::SCALAR_TO_VECTOR.
@@ -14930,7 +14927,10 @@
       if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
         return Res;
       // Set up the values for the shuffle vector fixup.
-      LHSMaxIdx = NumEltsOut / NumEltsIn;
+      LHSValidLaneWidth = SToVLHS.getValueType().getScalarSizeInBits() /
+                          LHS.getValueType().getScalarSizeInBits();
+      LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
+      LHSLastElt = LHSScalarSize / (ShuffleEltWidth + 1);
       SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
       if (SToVLHS.getValueType() != LHS.getValueType())
         SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
@@ -14939,8 +14939,10 @@
     if (SToVRHS) {
       if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
         return Res;
-      RHSMinIdx = NumEltsOut;
-      RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
+      RHSValidLaneWidth = SToVRHS.getValueType().getScalarSizeInBits() /
+                          RHS.getValueType().getScalarSizeInBits();
+      RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
+      RHSLastElt = RHSScalarSize / (ShuffleEltWidth + 1) + RHSFirstElt;
       SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
       if (SToVRHS.getValueType() != RHS.getValueType())
         SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
@@ -14951,9 +14953,10 @@
     // The minimum and maximum indices that correspond to element zero for both
     // the LHS and RHS are computed and will control which shuffle mask entries
     // are to be changed. For example, if the RHS is permuted, any shuffle mask
-    // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
-    fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
-                                    HalfVec, ValidLaneWidth, Subtarget);
+    // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
+    fixupShuffleMaskForPermutedSToV(
+        ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
+        LHSValidLaneWidth, RHSValidLaneWidth, Subtarget);
     Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
 
     // We may have simplified away the shuffle. We won't be able to do anything
diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
--- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -2567,11 +2567,9 @@
 ;
 ; CHECK-LE-LABEL: buildi2:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    mtfprd f0, r4
+; CHECK-LE-NEXT:    mtfprwz f0, r4
 ; CHECK-LE-NEXT:    mtfprd f1, r3
-; CHECK-LE-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-NEXT:    xxswapd v2, vs1
-; CHECK-LE-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-NEXT:    blr
 ;
 ; CHECK-AIX-LABEL: buildi2:
diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
--- a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
@@ -267,56 +267,54 @@
 define <16 x i8> @test_v16i8_v8i16(i16 %arg, i8 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    mtfprd f1, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
-; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P9-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 56
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    mtvsrd v2, r4
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI3_0@toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI3_0@toc@l
+; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r5
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v16i8_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r4, r4, 56
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P9-NEXT:    mtvsrd v2, r4
-; CHECK-BE-P9-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI3_0@toc@l
+; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r4
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT:    ld r5, L..C3(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r4, r4, 56
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r4
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16:
@@ -349,56 +347,54 @@
 define <16 x i8> @test_v8i16_v16i8(i16 %arg, i8 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    mtfprd f1, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
-; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P9-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 56
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    mtvsrd v2, r4
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI4_0@toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI4_0@toc@l
+; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r5
+; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r4, r4, 56
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P9-NEXT:    mtvsrd v2, r4
-; CHECK-BE-P9-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI4_0@toc@l
+; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r4
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT:    ld r5, L..C4(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r4, r4, 56
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r4
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8:
@@ -473,7 +469,7 @@
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C3(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r5, L..C5(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
@@ -483,7 +479,7 @@
 ; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r3
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C4(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
@@ -547,7 +543,7 @@
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C4(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r4, L..C6(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
@@ -582,53 +578,54 @@
 define <16 x i8> @test_v16i8_v4i32(i8 %arg, i32 %arg1, <16 x i8> %a, <4 x i32> %b) {
 ; CHECK-LE-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
-; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P8-NEXT:    mtvsrwz v3, r4
+; CHECK-LE-P8-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    mtvsrws v3, r4
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P9-NEXT:    mtvsrwz v3, r4
+; CHECK-LE-P9-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 32
-; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r4
-; CHECK-BE-P8-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI7_0@toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI7_0@toc@l
+; CHECK-BE-P8-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r5
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v16i8_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P9-NEXT:    mtvsrws v3, r4
-; CHECK-BE-P9-NEXT:    mtvsrd v2, r3
-; CHECK-BE-P9-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI7_0@toc@ha
+; CHECK-BE-P9-NEXT:    mtvsrwz v3, r4
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI7_0@toc@l
+; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 32
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r4
-; CHECK-AIX-64-P8-NEXT:    vmrghb v2, v2, v3
+; CHECK-AIX-64-P8-NEXT:    ld r5, L..C7(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT:    mtvsrws v3, r4
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r3
-; CHECK-AIX-64-P9-NEXT:    vmrghb v2, v2, v3
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r4
+; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32:
@@ -662,53 +659,54 @@
 define <16 x i8> @test_v4i32_v16i8(i32 %arg, i8 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    mtfprd f1, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
-; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P8-NEXT:    mtvsrwz v3, r3
+; CHECK-LE-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    mtvsrws v3, r3
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-LE-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 56
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P8-NEXT:    mtvsrd v2, r4
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI8_0@toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI8_0@toc@l
+; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
+; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r5
+; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r4, r4, 56
-; CHECK-BE-P9-NEXT:    mtvsrws v3, r3
-; CHECK-BE-P9-NEXT:    mtvsrd v2, r4
-; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI8_0@toc@ha
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI8_0@toc@l
+; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 32
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r4
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT:    ld r5, L..C8(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r4, r4, 56
-; CHECK-AIX-64-P9-NEXT:    mtvsrws v3, r3
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r4
-; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C6(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8:
@@ -783,9 +781,9 @@
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r4, L..C5(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r4, L..C9(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
-; CHECK-AIX-64-P8-NEXT:    ld r3, L..C6(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT:    ld r3, L..C10(r2) # %const.1
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r3
@@ -876,7 +874,7 @@
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lbzx r4, 0, r4
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C7(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r5, L..C11(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r5
@@ -887,7 +885,7 @@
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxsiwzx v2, 0, r3
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C7(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxsibzx v3, 0, r4
 ; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
 ; CHECK-AIX-64-P9-NEXT:    vspltb v3, v3, 7
@@ -930,20 +928,16 @@
 define <16 x i8> @test_v16i8_v2i64(i8 %arg, i64 %arg1, <16 x i8> %a, <2 x i64> %b) {
 ; CHECK-LE-P8-LABEL: test_v16i8_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
-; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r4
+; CHECK-LE-P8-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P9-NEXT:    mtvsrd v3, r4
+; CHECK-LE-P9-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v2i64:
@@ -1009,20 +1003,16 @@
 define <16 x i8> @test_v2i64_v16i8(i64 %arg, i8 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v2i64_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r4
-; CHECK-LE-P8-NEXT:    mtfprd f1, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
-; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P8-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r4
+; CHECK-LE-P9-NEXT:    mtvsrd v3, r3
+; CHECK-LE-P9-NEXT:    vmrghh v2, v2, v3
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v16i8:
@@ -1394,7 +1384,7 @@
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C8(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r5, L..C12(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
 ; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
 ; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
@@ -1403,7 +1393,7 @@
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16rhs:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    ld r5, L..C4(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r5, L..C8(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r4
 ; CHECK-AIX-64-P9-NEXT:    mtvsrwz v4, r3
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r5)
@@ -1441,53 +1431,54 @@
 define <16 x i8> @test_v8i16_v4i32(<8 x i16> %a, <4 x i32> %b, i16 %arg, i32 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r7
-; CHECK-LE-P8-NEXT:    mtfprd f1, r8
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
-; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r7
+; CHECK-LE-P8-NEXT:    mtvsrwz v3, r8
+; CHECK-LE-P8-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r7
-; CHECK-LE-P9-NEXT:    mtvsrws v3, r8
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r7
+; CHECK-LE-P9-NEXT:    mtvsrwz v3, r8
+; CHECK-LE-P9-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r7, 48
-; CHECK-BE-P8-NEXT:    sldi r4, r8, 32
-; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r4
-; CHECK-BE-P8-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI17_0@toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r7
+; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI17_0@toc@l
+; CHECK-BE-P8-NEXT:    mtvsrwz v4, r8
+; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r3, r7, 48
-; CHECK-BE-P9-NEXT:    mtvsrws v3, r8
-; CHECK-BE-P9-NEXT:    mtvsrd v2, r3
-; CHECK-BE-P9-NEXT:    vmrghb v2, v2, v3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI17_0@toc@ha
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r7
+; CHECK-BE-P9-NEXT:    mtvsrwz v3, r8
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI17_0@toc@l
+; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 32
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r4
-; CHECK-AIX-64-P8-NEXT:    vmrghb v2, v2, v3
+; CHECK-AIX-64-P8-NEXT:    ld r5, L..C13(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P9-NEXT:    mtvsrws v3, r4
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r3
-; CHECK-AIX-64-P9-NEXT:    vmrghb v2, v2, v3
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C9(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r4
+; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
@@ -1521,20 +1512,16 @@
 define <16 x i8> @test_v8i16_v2i64(<8 x i16> %a, <2 x i64> %b, i16 %arg, i64 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r7
-; CHECK-LE-P8-NEXT:    mtfprd f1, r8
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
-; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r7
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r8
+; CHECK-LE-P8-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r7
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtfprd f0, r8
-; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P9-NEXT:    vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r7
+; CHECK-LE-P9-NEXT:    mtvsrd v3, r8
+; CHECK-LE-P9-NEXT:    vmrghb v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
@@ -1671,53 +1658,54 @@
 define <16 x i8> @test_v4i32_v8i16(i32 %arg, i16 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
-; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r4
+; CHECK-LE-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    mtvsrws v2, r3
-; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT:    mtvsrwz v2, r3
+; CHECK-LE-P9-NEXT:    mtvsrd v3, r4
+; CHECK-LE-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 48
-; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
-; CHECK-BE-P8-NEXT:    mtvsrd v3, r4
-; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI20_0@toc@ha
+; CHECK-BE-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI20_0@toc@l
+; CHECK-BE-P8-NEXT:    mtvsrwz v4, r4
+; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r5
+; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    mtvsrws v2, r3
-; CHECK-BE-P9-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P9-NEXT:    mtvsrd v3, r3
-; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT:    mtvsrwz v2, r3
+; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI20_0@toc@ha
+; CHECK-BE-P9-NEXT:    mtvsrwz v3, r4
+; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI20_0@toc@l
+; CHECK-BE-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 32
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 48
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
-; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r4
-; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT:    ld r5, L..C14(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r4
+; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
+; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    mtvsrws v2, r3
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P9-NEXT:    mtvsrd v3, r3
-; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r3
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C10(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r4
+; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
@@ -1826,18 +1814,15 @@
 ; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrglw v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P8-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxmrglw v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P9-NEXT:    xxmrghw v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
@@ -1901,20 +1886,16 @@
 define <16 x i8> @test_v2i64_v8i16(i64 %arg, i16 %arg1) {
 ; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
-; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P8-NEXT:    mtvsrd v3, r4
+; CHECK-LE-P8-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
-; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT:    mtvsrd v2, r3
+; CHECK-LE-P9-NEXT:    mtvsrd v3, r4
+; CHECK-LE-P9-NEXT:    vmrghh v2, v3, v2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
@@ -1981,25 +1962,21 @@
 ; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI24_0@toc@ha
-; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r3
-; CHECK-LE-P8-NEXT:    lfdx f1, 0, r4
-; CHECK-LE-P8-NEXT:    addi r3, r5, .LCPI24_0@toc@l
-; CHECK-LE-P8-NEXT:    lxvd2x vs2, 0, r3
-; CHECK-LE-P8-NEXT:    xxswapd v2, f0
-; CHECK-LE-P8-NEXT:    xxswapd v3, f1
-; CHECK-LE-P8-NEXT:    xxswapd v4, vs2
+; CHECK-LE-P8-NEXT:    lxsiwzx v2, 0, r3
+; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r4
+; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI24_0@toc@l
+; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
 ; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
-; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
+; CHECK-LE-P9-NEXT:    lxsiwzx v2, 0, r3
 ; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI24_0@toc@ha
+; CHECK-LE-P9-NEXT:    lxsd v3, 0(r4)
 ; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI24_0@toc@l
-; CHECK-LE-P9-NEXT:    xxswapd v2, f0
-; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
 ; CHECK-LE-P9-NEXT:    lxv v4, 0(r3)
-; CHECK-LE-P9-NEXT:    xxswapd v3, f0
 ; CHECK-LE-P9-NEXT:    vperm v2, v3, v2, v4
 ; CHECK-LE-P9-NEXT:    blr
 ;
@@ -2027,7 +2004,7 @@
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    ld r5, L..C9(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT:    ld r5, L..C15(r2) # %const.0
 ; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
 ; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
@@ -2038,7 +2015,7 @@
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT:    ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT:    ld r3, L..C11(r2) # %const.0
 ; CHECK-AIX-64-P9-NEXT:    lxsd v3, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    xxsldwi v2, f0, f0, 1
 ; CHECK-AIX-64-P9-NEXT:    lxv v4, 0(r3)
diff --git a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
--- a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
@@ -30,42 +30,42 @@
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v16i8_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v16i8_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8:
@@ -102,42 +102,42 @@
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT:    xxpermdi v2, vs0, v2, 1
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_none_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT:    xxpermdi v2, vs0, v2, 1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_none_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_none_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_none_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v16i8:
@@ -170,42 +170,42 @@
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT:    xxpermdi v2, v2, vs0, 2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_none:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT:    xxpermdi v2, v2, vs0, 2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-BE-P8-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v16i8_none:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-BE-P9-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P8-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v16i8_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P9-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_none:
@@ -237,54 +237,42 @@
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 48
-; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    mtfprd f1, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v16i8_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P9-NEXT:    mtfprd f0, r3
-; CHECK-BE-P9-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P9-NEXT:    mtfprd f1, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 48
-; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P9-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16:
@@ -320,54 +308,42 @@
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, vs1
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, vs1
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 48
-; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    mtfprd f1, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P9-NEXT:    mtfprd f0, r3
-; CHECK-BE-P9-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P9-NEXT:    mtfprd f1, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 48
-; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P9-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8:
@@ -404,42 +380,42 @@
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT:    xxpermdi v2, v2, vs0, 2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_none:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT:    xxpermdi v2, v2, vs0, 2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-BE-P8-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_none:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-BE-P9-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P8-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P9-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
@@ -472,42 +448,42 @@
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT:    xxpermdi v2, vs0, v2, 1
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_none_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT:    xxpermdi v2, vs0, v2, 1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_none_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_none_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
@@ -538,52 +514,43 @@
 ; CHECK-LE-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 32
-; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    mtfprd f1, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v16i8_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-BE-P9-NEXT:    mtfprd f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 32
-; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-AIX-64-P9-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32:
@@ -618,52 +585,43 @@
 ; CHECK-LE-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, vs1
+; CHECK-LE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, vs1
+; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 32
-; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    mtfprd f1, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 56
-; CHECK-BE-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-BE-P9-NEXT:    mtfprd f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 32
-; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 56
-; CHECK-AIX-64-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-AIX-64-P9-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs1, vs0
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8:
@@ -700,42 +658,42 @@
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    mtfprwz f0, r3
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT:    xxpermdi v2, vs0, v2, 1
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_none_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT:    xxpermdi v2, vs0, v2, 1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_none_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_none_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT:    xxpermdi v2, v2, vs0, 1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
@@ -768,42 +726,42 @@
 ; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
 ; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-LE-P8-NEXT:    mtfprwz f0, r3
-; CHECK-LE-P8-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT:    xxpermdi v2, v2, vs0, 2
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_none:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-LE-P9-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT:    xxpermdi v2, v2, vs0, 2
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_none:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-BE-P8-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_none:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-BE-P9-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    lxvd2x v2, 0, r4
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P8-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    lxv v2, 0(r4)
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P9-NEXT:    xxpermdi v2, vs0, v2, 2
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
@@ -835,18 +793,14 @@
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v16i8_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P9-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v16i8_v2i64:
@@ -923,18 +877,14 @@
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v16i8:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs0, vs1
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v16i8:
@@ -942,14 +892,14 @@
 ; CHECK-BE-P8-NEXT:    mtfprd f0, r4
 ; CHECK-BE-P8-NEXT:    xxspltd v2, vs0, 0
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
-; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v2i64_v16i8:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    mtvsrdd v2, r4, r4
-; CHECK-BE-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_v16i8:
@@ -957,14 +907,14 @@
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r4
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs0
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v2i64_v16i8:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P9-NEXT:    mtvsrdd v2, r4, r4
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v16i8:
@@ -1159,42 +1109,42 @@
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16:
@@ -1229,52 +1179,43 @@
 ; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 32
-; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    mtfprd f1, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    sldi r3, r3, 48
-; CHECK-BE-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-BE-P9-NEXT:    mtfprd f0, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 32
-; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 48
-; CHECK-AIX-64-P9-NEXT:    mtvsrws vs1, r4
-; CHECK-AIX-64-P9-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
@@ -1310,18 +1251,14 @@
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P9-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
@@ -1398,42 +1335,42 @@
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprwz f1, r4
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-BE-P8:       # %bb.0: # %entry
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
@@ -1467,53 +1404,44 @@
 define <2 x i64> @test_v4i32_v8i16(i32 %arg1, i16 %arg) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
+; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    mtvsrws vs0, r3
-; CHECK-LE-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P8:       # %bb.0: # %entry
-; CHECK-BE-P8-NEXT:    sldi r3, r3, 32
-; CHECK-BE-P8-NEXT:    sldi r4, r4, 48
-; CHECK-BE-P8-NEXT:    mtfprd f0, r3
-; CHECK-BE-P8-NEXT:    mtfprd f1, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
-; CHECK-BE-P9-NEXT:    mtvsrws vs0, r3
-; CHECK-BE-P9-NEXT:    sldi r3, r4, 48
-; CHECK-BE-P9-NEXT:    mtfprd f1, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT:    mtfprwz f0, r3
+; CHECK-BE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-BE-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P8:       # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 32
-; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 48
-; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
-; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT:    mtvsrws vs0, r3
-; CHECK-AIX-64-P9-NEXT:    sldi r3, r4, 48
-; CHECK-AIX-64-P9-NEXT:    mtfprd f1, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, vs0, vs1
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
@@ -1547,19 +1475,16 @@
 define <2 x i64> @test_v4i32_v2i64(i32 %arg1, i64 %arg) {
 ; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
 ; CHECK-LE-P8:       # %bb.0: # %entry
-; CHECK-LE-P8-NEXT:    mtfprd f0, r3
+; CHECK-LE-P8-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
 ; CHECK-LE-P9:       # %bb.0: # %entry
+; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-LE-P9-NEXT:    mtfprd f1, r4
-; CHECK-LE-P9-NEXT:    mtvsrws vs0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs1
-; CHECK-LE-P9-NEXT:    xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
@@ -1744,18 +1669,15 @@
 ; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
-; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtvsrws vs0, r4
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
@@ -1763,14 +1685,14 @@
 ; CHECK-BE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-BE-P8-NEXT:    xxspltd v2, vs0, 0
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v2i64_v4i32:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r4
 ; CHECK-BE-P9-NEXT:    mtvsrdd v2, r3, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32:
@@ -1778,14 +1700,14 @@
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs0
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r4
 ; CHECK-AIX-64-P9-NEXT:    mtvsrdd v2, r3, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
@@ -1828,18 +1750,14 @@
 ; CHECK-LE-P8:       # %bb.0: # %entry
 ; CHECK-LE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-LE-P8-NEXT:    mtfprd f1, r4
-; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P8-NEXT:    xxswapd vs0, vs1
-; CHECK-LE-P8-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P8-NEXT:    blr
 ;
 ; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-LE-P9:       # %bb.0: # %entry
 ; CHECK-LE-P9-NEXT:    mtfprd f0, r3
-; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
-; CHECK-LE-P9-NEXT:    mtfprd f0, r4
-; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-LE-P9-NEXT:    xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT:    mtfprd f1, r4
+; CHECK-LE-P9-NEXT:    xxmrghd v2, vs1, vs0
 ; CHECK-LE-P9-NEXT:    blr
 ;
 ; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
@@ -1847,14 +1765,14 @@
 ; CHECK-BE-P8-NEXT:    mtfprd f0, r3
 ; CHECK-BE-P8-NEXT:    xxspltd v2, vs0, 0
 ; CHECK-BE-P8-NEXT:    mtfprwz f0, r4
-; CHECK-BE-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-BE-P8-NEXT:    blr
 ;
 ; CHECK-BE-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-BE-P9:       # %bb.0: # %entry
 ; CHECK-BE-P9-NEXT:    mtfprwz f0, r4
 ; CHECK-BE-P9-NEXT:    mtvsrdd v2, r3, r3
-; CHECK-BE-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-BE-P9-NEXT:    blr
 ;
 ; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16:
@@ -1862,14 +1780,14 @@
 ; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
 ; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, vs0, vs0
 ; CHECK-AIX-64-P8-NEXT:    mtfprwz f0, r4
-; CHECK-AIX-64-P8-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-AIX-64-P8-NEXT:    blr
 ;
 ; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16:
 ; CHECK-AIX-64-P9:       # %bb.0: # %entry
 ; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r4
 ; CHECK-AIX-64-P9-NEXT:    mtvsrdd v2, r3, r3
-; CHECK-AIX-64-P9-NEXT:    xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT:    xxmrgld v2, v2, vs0
 ; CHECK-AIX-64-P9-NEXT:    blr
 ;
 ; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16: