Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -123,6 +123,7 @@ /// XXPERMDI - The PPC XXPERMDI instruction /// XXPERMDI, + XXPERM, /// The CMPB instruction (takes two operands of i32 or i64). CMPB, @@ -1285,6 +1286,8 @@ SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVPERM(SDValue Op, SelectionDAG &DAG, SDValue VPermMask, + SDValue V1, SDValue V2) const; SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -176,7 +176,8 @@ STATISTIC(NumSiblingCalls, "Number of sibling calls"); STATISTIC(NumOfVecPairStores, "Number of vector paired stores"); STATISTIC(NumOfVecPairLoads, "Number of vector paired loads"); -STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM"); +STATISTIC(ShufflesHandledWithVPERM, + "Number of shuffles lowered to a VPERM or XXPERM"); STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed"); static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int); @@ -1708,6 +1709,7 @@ return "PPCISD::XXSPLTI32DX"; case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; + case PPCISD::XXPERM: return "PPCISD::XXPERM"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; @@ -10175,12 +10177,41 @@ LLVM_DEBUG(dbgs() << "With the following permute control vector:\n"); LLVM_DEBUG(VPermMask.dump()); - if (isLittleEndian) - return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), - V2, V1, VPermMask); - else - return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), - V1, V2, VPermMask); + return LowerVPERM(Op, DAG, VPermMask, V1, V2); +} + +SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG, + SDValue VPermMask, SDValue V1, + SDValue V2) const { + unsigned Opcode = PPCISD::VPERM; + auto ValType = V1.getValueType(); + SDLoc dl(Op); + + if (Subtarget.isLittleEndian()) + std::swap(V1, V2); + + if (Subtarget.isISA3_0() && V1->hasOneUse()) { + LLVM_DEBUG( + dbgs() + << "At least one of two input vector is dead - using XXPERM instead\n"); + Opcode = PPCISD::XXPERM; + + // TODO along with future XXSWAPD patch + // If V2 is dead and V1 is not, swap them + // but also will need to fix up the mask + if (ValType != MVT::v2f64) + V1 = DAG.getBitcast(MVT::v2f64, V1); + if (V2.getValueType() != MVT::v2f64) + V2 = DAG.getBitcast(MVT::v2f64, V2); + } + + SDValue VPERMNode = + DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask); + + if (ValType != MVT::v2f64 && Opcode == PPCISD::XXPERM) + VPERMNode = DAG.getBitcast(ValType, VPERMNode); + + return VPERMNode; } /// getVectorCompareInfo - Given an intrinsic, return false if it is not a Index: llvm/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -141,6 +141,10 @@ def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED", SDTypeProfile<1, 1, []>, []>; +def SDT_PPCxxperm : SDTypeProfile<1, 3, + [SDTCisVT<0, v2f64>, SDTCisVT<1, v2f64>, + SDTCisVT<2, v2f64>, SDTCisVT<3, v16i8>]>; +def PPCxxperm : SDNode<"PPCISD::XXPERM", SDT_PPCxxperm, []>; //-------------------------- Predicate definitions ---------------------------// def HasVSX : Predicate<"Subtarget->hasVSX()">; def IsLittleEndian : Predicate<"Subtarget->isLittleEndian()">; @@ -1655,10 +1659,14 @@ // Vector Permute // FIXME: Setting the hasSideEffects flag here to match current behaviour. let hasSideEffects = 1 in { - def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc, - IIC_VecPerm, []>; - def XXPERMR : XX3_XT5_XA5_XB5<60, 58, "xxpermr", vsrc, vsrc, vsrc, - IIC_VecPerm, []>; + def XXPERM : XX3Form<60, 26, (outs vsrc:$XT), + (ins vsrc:$XA, vsrc:$XTi, vsrc:$XB), + "xxperm $XT, $XA, $XB", IIC_VecPerm, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; + def XXPERMR : XX3Form<60, 58, (outs vsrc:$XT), + (ins vsrc:$XA, vsrc:$XTi, vsrc:$XB), + "xxpermr $XT, $XA, $XB", IIC_VecPerm, []>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; } // Vector Splat Immediate Byte @@ -4130,6 +4138,8 @@ (v4f32 (LXVWSX ForceXForm:$A))>; def : Pat<(v4i32 (PPCldsplat ForceXForm:$A)), (v4i32 (LXVWSX ForceXForm:$A))>; +def : Pat<(v2f64 (PPCxxperm v2f64:$XT, v2f64:$XB, (v16i8 (bitconvert v4i32:$C)))), + (XXPERM v2f64:$XT, v2f64:$XB, v4i32:$C)>; } // HasVSX, HasP9Vector // Any Power9 VSX subtarget with equivalent length but better Power10 VSX Index: llvm/test/CodeGen/AArch64/srem-vector-lkk.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-vector-lkk.ll +++ llvm/test/CodeGen/AArch64/srem-vector-lkk.ll @@ -98,7 +98,6 @@ ret <4 x i16> %1 } - ; Don't fold if we can combine srem with sdiv. define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; CHECK-LABEL: combine_srem_sdiv: Index: llvm/test/CodeGen/PowerPC/aix-p9-insert-extract.ll =================================================================== --- llvm/test/CodeGen/PowerPC/aix-p9-insert-extract.ll +++ llvm/test/CodeGen/PowerPC/aix-p9-insert-extract.ll @@ -442,28 +442,18 @@ ; shufflevector. If halfword element 3 in BE mode(or 4 in LE mode) is the one ; we're attempting to insert, then we can use the vector insert instruction define <8 x i16> @shuffle_vector_halfword_0_4(<8 x i16> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinserth 2, 2, 14 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_halfword_0_4: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis 3, 2, .LCPI16_0@toc@ha -; CHECK-BE-NEXT: addi 3, 3, .LCPI16_0@toc@l -; CHECK-BE-NEXT: lxvx 35, 0, 3 -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_halfword_0_4: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ld 3, L..C0(2) -; CHECK-64-NEXT: lxv 35, 0(3) -; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: ld 3, L..C0(2) # %const.0 +; CHECK-64-NEXT: lxv 0, 0(3) +; CHECK-64-NEXT: xxperm 34, 34, 0 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: shuffle_vector_halfword_0_4: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lwz 3, L..C0(2) -; CHECK-32-NEXT: lxv 35, 0(3) -; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: lwz 3, L..C0(2) # %const.0 +; CHECK-32-NEXT: lxv 0, 0(3) +; CHECK-32-NEXT: xxperm 34, 34, 0 ; CHECK-32-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -471,16 +461,6 @@ } define <8 x i16> @shuffle_vector_halfword_1_3(<8 x i16> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis 3, 2, .LCPI17_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI17_0@toc@l -; CHECK-NEXT: lxvx 35, 0, 3 -; CHECK-NEXT: vperm 2, 2, 2, 3 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_halfword_1_3: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinserth 2, 2, 2 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_halfword_1_3: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vinserth 2, 2, 2 @@ -496,16 +476,6 @@ } define <8 x i16> @shuffle_vector_halfword_2_3(<8 x i16> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis 3, 2, .LCPI18_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI18_0@toc@l -; CHECK-NEXT: lxvx 35, 0, 3 -; CHECK-NEXT: vperm 2, 2, 2, 3 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_halfword_2_3: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinserth 2, 2, 4 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_halfword_2_3: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vinserth 2, 2, 4 @@ -521,28 +491,18 @@ } define <8 x i16> @shuffle_vector_halfword_3_4(<8 x i16> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinserth 2, 2, 8 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_halfword_3_4: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis 3, 2, .LCPI19_0@toc@ha -; CHECK-BE-NEXT: addi 3, 3, .LCPI19_0@toc@l -; CHECK-BE-NEXT: lxvx 35, 0, 3 -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_halfword_3_4: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ld 3, L..C1(2) -; CHECK-64-NEXT: lxv 35, 0(3) -; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: ld 3, L..C1(2) # %const.0 +; CHECK-64-NEXT: lxv 0, 0(3) +; CHECK-64-NEXT: xxperm 34, 34, 0 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: shuffle_vector_halfword_3_4: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lwz 3, L..C1(2) -; CHECK-32-NEXT: lxv 35, 0(3) -; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: lwz 3, L..C1(2) # %const.0 +; CHECK-32-NEXT: lxv 0, 0(3) +; CHECK-32-NEXT: xxperm 34, 34, 0 ; CHECK-32-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -550,16 +510,6 @@ } define <8 x i16> @shuffle_vector_halfword_4_3(<8 x i16> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis 3, 2, .LCPI20_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI20_0@toc@l -; CHECK-NEXT: lxvx 35, 0, 3 -; CHECK-NEXT: vperm 2, 2, 2, 3 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_halfword_4_3: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinserth 2, 2, 8 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_halfword_4_3: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vinserth 2, 2, 8 @@ -575,16 +525,6 @@ } define <8 x i16> @shuffle_vector_halfword_5_3(<8 x i16> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis 3, 2, .LCPI21_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI21_0@toc@l -; CHECK-NEXT: lxvx 35, 0, 3 -; CHECK-NEXT: vperm 2, 2, 2, 3 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_halfword_5_3: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinserth 2, 2, 10 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_halfword_5_3: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vinserth 2, 2, 10 @@ -600,28 +540,18 @@ } define <8 x i16> @shuffle_vector_halfword_6_4(<8 x i16> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinserth 2, 2, 2 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_halfword_6_4: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis 3, 2, .LCPI22_0@toc@ha -; CHECK-BE-NEXT: addi 3, 3, .LCPI22_0@toc@l -; CHECK-BE-NEXT: lxvx 35, 0, 3 -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_halfword_6_4: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ld 3, L..C2(2) -; CHECK-64-NEXT: lxv 35, 0(3) -; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: ld 3, L..C2(2) # %const.0 +; CHECK-64-NEXT: lxv 0, 0(3) +; CHECK-64-NEXT: xxperm 34, 34, 0 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: shuffle_vector_halfword_6_4: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lwz 3, L..C2(2) -; CHECK-32-NEXT: lxv 35, 0(3) -; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: lwz 3, L..C2(2) # %const.0 +; CHECK-32-NEXT: lxv 0, 0(3) +; CHECK-32-NEXT: xxperm 34, 34, 0 ; CHECK-32-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -629,28 +559,18 @@ } define <8 x i16> @shuffle_vector_halfword_7_4(<8 x i16> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinserth 2, 2, 0 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_halfword_7_4: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis 3, 2, .LCPI23_0@toc@ha -; CHECK-BE-NEXT: addi 3, 3, .LCPI23_0@toc@l -; CHECK-BE-NEXT: lxvx 35, 0, 3 -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_halfword_7_4: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ld 3, L..C3(2) -; CHECK-64-NEXT: lxv 35, 0(3) -; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: ld 3, L..C3(2) # %const.0 +; CHECK-64-NEXT: lxv 0, 0(3) +; CHECK-64-NEXT: xxperm 34, 34, 0 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: shuffle_vector_halfword_7_4: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lwz 3, L..C3(2) -; CHECK-32-NEXT: lxv 35, 0(3) -; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: lwz 3, L..C3(2) # %const.0 +; CHECK-32-NEXT: lxv 0, 0(3) +; CHECK-32-NEXT: xxperm 34, 34, 0 ; CHECK-32-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -660,15 +580,6 @@ ; The following testcases take one byte element from the second vector and ; inserts it at various locations in the first vector define <16 x i8> @shuffle_vector_byte_0_16(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 8 -; CHECK-NEXT: vinsertb 2, 3, 15 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_0_16: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 9 -; CHECK-BE-NEXT: vinsertb 2, 3, 0 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_0_16: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 9 @@ -686,15 +597,6 @@ } define <16 x i8> @shuffle_vector_byte_1_25(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 15 -; CHECK-NEXT: vinsertb 2, 3, 14 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_1_25: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 2 -; CHECK-BE-NEXT: vinsertb 2, 3, 1 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_1_25: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 2 @@ -712,15 +614,6 @@ } define <16 x i8> @shuffle_vector_byte_2_18(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 6 -; CHECK-NEXT: vinsertb 2, 3, 13 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_2_18: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 11 -; CHECK-BE-NEXT: vinsertb 2, 3, 2 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_2_18: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 11 @@ -738,15 +631,6 @@ } define <16 x i8> @shuffle_vector_byte_3_27(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 13 -; CHECK-NEXT: vinsertb 2, 3, 12 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_3_27: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 4 -; CHECK-BE-NEXT: vinsertb 2, 3, 3 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_3_27: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 4 @@ -764,15 +648,6 @@ } define <16 x i8> @shuffle_vector_byte_4_20(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 4 -; CHECK-NEXT: vinsertb 2, 3, 11 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_4_20: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 13 -; CHECK-BE-NEXT: vinsertb 2, 3, 4 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_4_20: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 13 @@ -790,15 +665,6 @@ } define <16 x i8> @shuffle_vector_byte_5_29(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 11 -; CHECK-NEXT: vinsertb 2, 3, 10 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_5_29: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 6 -; CHECK-BE-NEXT: vinsertb 2, 3, 5 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_5_29: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 6 @@ -816,15 +682,6 @@ } define <16 x i8> @shuffle_vector_byte_6_22(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 2 -; CHECK-NEXT: vinsertb 2, 3, 9 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_6_22: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 15 -; CHECK-BE-NEXT: vinsertb 2, 3, 6 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_6_22: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 15 @@ -842,15 +699,6 @@ } define <16 x i8> @shuffle_vector_byte_7_31(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 9 -; CHECK-NEXT: vinsertb 2, 3, 8 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_7_31: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 8 -; CHECK-BE-NEXT: vinsertb 2, 3, 7 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_7_31: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 8 @@ -868,14 +716,6 @@ } define <16 x i8> @shuffle_vector_byte_8_24(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsertb 2, 3, 7 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_8_24: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 1 -; CHECK-BE-NEXT: vinsertb 2, 3, 8 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_8_24: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 1 @@ -893,15 +733,6 @@ } define <16 x i8> @shuffle_vector_byte_9_17(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 7 -; CHECK-NEXT: vinsertb 2, 3, 6 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_9_17: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 10 -; CHECK-BE-NEXT: vinsertb 2, 3, 9 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_9_17: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 10 @@ -919,15 +750,6 @@ } define <16 x i8> @shuffle_vector_byte_10_26(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 14 -; CHECK-NEXT: vinsertb 2, 3, 5 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_10_26: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 3 -; CHECK-BE-NEXT: vinsertb 2, 3, 10 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_10_26: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 3 @@ -945,15 +767,6 @@ } define <16 x i8> @shuffle_vector_byte_11_19(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 5 -; CHECK-NEXT: vinsertb 2, 3, 4 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_11_19: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 12 -; CHECK-BE-NEXT: vinsertb 2, 3, 11 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_11_19: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 12 @@ -971,15 +784,6 @@ } define <16 x i8> @shuffle_vector_byte_12_28(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 12 -; CHECK-NEXT: vinsertb 2, 3, 3 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_12_28: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 5 -; CHECK-BE-NEXT: vinsertb 2, 3, 12 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_12_28: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 5 @@ -997,15 +801,6 @@ } define <16 x i8> @shuffle_vector_byte_13_21(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 3 -; CHECK-NEXT: vinsertb 2, 3, 2 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_13_21: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 14 -; CHECK-BE-NEXT: vinsertb 2, 3, 13 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_13_21: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 14 @@ -1023,15 +818,6 @@ } define <16 x i8> @shuffle_vector_byte_14_30(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 10 -; CHECK-NEXT: vinsertb 2, 3, 1 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_14_30: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 3, 3, 3, 7 -; CHECK-BE-NEXT: vinsertb 2, 3, 14 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_14_30: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vsldoi 3, 3, 3, 7 @@ -1049,14 +835,6 @@ } define <16 x i8> @shuffle_vector_byte_15_23(<16 x i8> %a, <16 x i8> %b) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 3, 3, 3, 1 -; CHECK-NEXT: vinsertb 2, 3, 0 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_15_23: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinsertb 2, 3, 15 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_15_23: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vinsertb 2, 3, 15 @@ -1665,16 +1443,6 @@ ; shufflevector. If byte element 7 in BE mode(or 8 in LE mode) is the one ; we're attempting to insert, then we can use the vector insert instruction define <16 x i8> @shuffle_vector_byte_0_7(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis 3, 2, .LCPI56_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI56_0@toc@l -; CHECK-NEXT: lxvx 35, 0, 3 -; CHECK-NEXT: vperm 2, 2, 2, 3 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_0_7: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinsertb 2, 2, 0 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_0_7: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vinsertb 2, 2, 0 @@ -1690,28 +1458,18 @@ } define <16 x i8> @shuffle_vector_byte_1_8(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsertb 2, 2, 14 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_1_8: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis 3, 2, .LCPI57_0@toc@ha -; CHECK-BE-NEXT: addi 3, 3, .LCPI57_0@toc@l -; CHECK-BE-NEXT: lxvx 35, 0, 3 -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_1_8: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ld 3, L..C4(2) -; CHECK-64-NEXT: lxv 35, 0(3) -; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: ld 3, L..C4(2) # %const.0 +; CHECK-64-NEXT: lxv 0, 0(3) +; CHECK-64-NEXT: xxperm 34, 34, 0 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: shuffle_vector_byte_1_8: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lwz 3, L..C4(2) -; CHECK-32-NEXT: lxv 35, 0(3) -; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: lwz 3, L..C4(2) # %const.0 +; CHECK-32-NEXT: lxv 0, 0(3) +; CHECK-32-NEXT: xxperm 34, 34, 0 ; CHECK-32-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1719,28 +1477,18 @@ } define <16 x i8> @shuffle_vector_byte_2_8(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsertb 2, 2, 13 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_2_8: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis 3, 2, .LCPI58_0@toc@ha -; CHECK-BE-NEXT: addi 3, 3, .LCPI58_0@toc@l -; CHECK-BE-NEXT: lxvx 35, 0, 3 -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_2_8: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ld 3, L..C5(2) -; CHECK-64-NEXT: lxv 35, 0(3) -; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: ld 3, L..C5(2) # %const.0 +; CHECK-64-NEXT: lxv 0, 0(3) +; CHECK-64-NEXT: xxperm 34, 34, 0 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: shuffle_vector_byte_2_8: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lwz 3, L..C5(2) -; CHECK-32-NEXT: lxv 35, 0(3) -; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: lwz 3, L..C5(2) # %const.0 +; CHECK-32-NEXT: lxv 0, 0(3) +; CHECK-32-NEXT: xxperm 34, 34, 0 ; CHECK-32-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1748,16 +1496,6 @@ } define <16 x i8> @shuffle_vector_byte_3_7(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis 3, 2, .LCPI59_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI59_0@toc@l -; CHECK-NEXT: lxvx 35, 0, 3 -; CHECK-NEXT: vperm 2, 2, 2, 3 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_3_7: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinsertb 2, 2, 3 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_3_7: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vinsertb 2, 2, 3 @@ -1773,16 +1511,6 @@ } define <16 x i8> @shuffle_vector_byte_4_7(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis 3, 2, .LCPI60_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI60_0@toc@l -; CHECK-NEXT: lxvx 35, 0, 3 -; CHECK-NEXT: vperm 2, 2, 2, 3 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_4_7: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinsertb 2, 2, 4 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_4_7: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vinsertb 2, 2, 4 @@ -1798,28 +1526,18 @@ } define <16 x i8> @shuffle_vector_byte_5_8(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsertb 2, 2, 10 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_5_8: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis 3, 2, .LCPI61_0@toc@ha -; CHECK-BE-NEXT: addi 3, 3, .LCPI61_0@toc@l -; CHECK-BE-NEXT: lxvx 35, 0, 3 -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_5_8: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ld 3, L..C6(2) -; CHECK-64-NEXT: lxv 35, 0(3) -; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: ld 3, L..C6(2) # %const.0 +; CHECK-64-NEXT: lxv 0, 0(3) +; CHECK-64-NEXT: xxperm 34, 34, 0 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: shuffle_vector_byte_5_8: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lwz 3, L..C6(2) -; CHECK-32-NEXT: lxv 35, 0(3) -; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: lwz 3, L..C6(2) # %const.0 +; CHECK-32-NEXT: lxv 0, 0(3) +; CHECK-32-NEXT: xxperm 34, 34, 0 ; CHECK-32-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1827,28 +1545,18 @@ } define <16 x i8> @shuffle_vector_byte_6_8(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsertb 2, 2, 9 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_6_8: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis 3, 2, .LCPI62_0@toc@ha -; CHECK-BE-NEXT: addi 3, 3, .LCPI62_0@toc@l -; CHECK-BE-NEXT: lxvx 35, 0, 3 -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_6_8: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ld 3, L..C7(2) -; CHECK-64-NEXT: lxv 35, 0(3) -; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: ld 3, L..C7(2) # %const.0 +; CHECK-64-NEXT: lxv 0, 0(3) +; CHECK-64-NEXT: xxperm 34, 34, 0 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: shuffle_vector_byte_6_8: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lwz 3, L..C7(2) -; CHECK-32-NEXT: lxv 35, 0(3) -; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: lwz 3, L..C7(2) # %const.0 +; CHECK-32-NEXT: lxv 0, 0(3) +; CHECK-32-NEXT: xxperm 34, 34, 0 ; CHECK-32-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1856,28 +1564,18 @@ } define <16 x i8> @shuffle_vector_byte_7_8(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsertb 2, 2, 8 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_7_8: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis 3, 2, .LCPI63_0@toc@ha -; CHECK-BE-NEXT: addi 3, 3, .LCPI63_0@toc@l -; CHECK-BE-NEXT: lxvx 35, 0, 3 -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_7_8: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ld 3, L..C8(2) -; CHECK-64-NEXT: lxv 35, 0(3) -; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: ld 3, L..C8(2) # %const.0 +; CHECK-64-NEXT: lxv 0, 0(3) +; CHECK-64-NEXT: xxperm 34, 34, 0 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: shuffle_vector_byte_7_8: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lwz 3, L..C8(2) -; CHECK-32-NEXT: lxv 35, 0(3) -; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: lwz 3, L..C8(2) # %const.0 +; CHECK-32-NEXT: lxv 0, 0(3) +; CHECK-32-NEXT: xxperm 34, 34, 0 ; CHECK-32-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1885,16 +1583,6 @@ } define <16 x i8> @shuffle_vector_byte_8_7(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis 3, 2, .LCPI64_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI64_0@toc@l -; CHECK-NEXT: lxvx 35, 0, 3 -; CHECK-NEXT: vperm 2, 2, 2, 3 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_8_7: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinsertb 2, 2, 8 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_8_7: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vinsertb 2, 2, 8 @@ -1910,16 +1598,6 @@ } define <16 x i8> @shuffle_vector_byte_9_7(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis 3, 2, .LCPI65_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI65_0@toc@l -; CHECK-NEXT: lxvx 35, 0, 3 -; CHECK-NEXT: vperm 2, 2, 2, 3 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_9_7: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinsertb 2, 2, 9 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_9_7: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vinsertb 2, 2, 9 @@ -1935,16 +1613,6 @@ } define <16 x i8> @shuffle_vector_byte_10_7(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis 3, 2, .LCPI66_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI66_0@toc@l -; CHECK-NEXT: lxvx 35, 0, 3 -; CHECK-NEXT: vperm 2, 2, 2, 3 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_10_7: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinsertb 2, 2, 10 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_10_7: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vinsertb 2, 2, 10 @@ -1960,28 +1628,18 @@ } define <16 x i8> @shuffle_vector_byte_11_8(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsertb 2, 2, 4 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_11_8: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis 3, 2, .LCPI67_0@toc@ha -; CHECK-BE-NEXT: addi 3, 3, .LCPI67_0@toc@l -; CHECK-BE-NEXT: lxvx 35, 0, 3 -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_11_8: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ld 3, L..C9(2) -; CHECK-64-NEXT: lxv 35, 0(3) -; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: ld 3, L..C9(2) # %const.0 +; CHECK-64-NEXT: lxv 0, 0(3) +; CHECK-64-NEXT: xxperm 34, 34, 0 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: shuffle_vector_byte_11_8: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lwz 3, L..C9(2) -; CHECK-32-NEXT: lxv 35, 0(3) -; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: lwz 3, L..C9(2) # %const.0 +; CHECK-32-NEXT: lxv 0, 0(3) +; CHECK-32-NEXT: xxperm 34, 34, 0 ; CHECK-32-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1989,28 +1647,18 @@ } define <16 x i8> @shuffle_vector_byte_12_8(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsertb 2, 2, 3 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_12_8: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis 3, 2, .LCPI68_0@toc@ha -; CHECK-BE-NEXT: addi 3, 3, .LCPI68_0@toc@l -; CHECK-BE-NEXT: lxvx 35, 0, 3 -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_12_8: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ld 3, L..C10(2) -; CHECK-64-NEXT: lxv 35, 0(3) -; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: ld 3, L..C10(2) # %const.0 +; CHECK-64-NEXT: lxv 0, 0(3) +; CHECK-64-NEXT: xxperm 34, 34, 0 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: shuffle_vector_byte_12_8: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lwz 3, L..C10(2) -; CHECK-32-NEXT: lxv 35, 0(3) -; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: lwz 3, L..C10(2) # %const.0 +; CHECK-32-NEXT: lxv 0, 0(3) +; CHECK-32-NEXT: xxperm 34, 34, 0 ; CHECK-32-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -2018,16 +1666,6 @@ } define <16 x i8> @shuffle_vector_byte_13_7(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis 3, 2, .LCPI69_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI69_0@toc@l -; CHECK-NEXT: lxvx 35, 0, 3 -; CHECK-NEXT: vperm 2, 2, 2, 3 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_13_7: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinsertb 2, 2, 13 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_13_7: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vinsertb 2, 2, 13 @@ -2043,16 +1681,6 @@ } define <16 x i8> @shuffle_vector_byte_14_7(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis 3, 2, .LCPI70_0@toc@ha -; CHECK-NEXT: addi 3, 3, .LCPI70_0@toc@l -; CHECK-NEXT: lxvx 35, 0, 3 -; CHECK-NEXT: vperm 2, 2, 2, 3 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_14_7: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinsertb 2, 2, 14 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_14_7: ; CHECK-64: # %bb.0: # %entry ; CHECK-64-NEXT: vinsertb 2, 2, 14 @@ -2068,28 +1696,18 @@ } define <16 x i8> @shuffle_vector_byte_15_8(<16 x i8> %a) { -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsertb 2, 2, 0 -; CHECK-NEXT: blr -; CHECK-BE-LABEL: shuffle_vector_byte_15_8: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis 3, 2, .LCPI71_0@toc@ha -; CHECK-BE-NEXT: addi 3, 3, .LCPI71_0@toc@l -; CHECK-BE-NEXT: lxvx 35, 0, 3 -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 -; CHECK-BE-NEXT: blr ; CHECK-64-LABEL: shuffle_vector_byte_15_8: ; CHECK-64: # %bb.0: # %entry -; CHECK-64-NEXT: ld 3, L..C11(2) -; CHECK-64-NEXT: lxv 35, 0(3) -; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: ld 3, L..C11(2) # %const.0 +; CHECK-64-NEXT: lxv 0, 0(3) +; CHECK-64-NEXT: xxperm 34, 34, 0 ; CHECK-64-NEXT: blr ; ; CHECK-32-LABEL: shuffle_vector_byte_15_8: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lwz 3, L..C11(2) -; CHECK-32-NEXT: lxv 35, 0(3) -; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: lwz 3, L..C11(2) # %const.0 +; CHECK-32-NEXT: lxv 0, 0(3) +; CHECK-32-NEXT: xxperm 34, 34, 0 ; CHECK-32-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> Index: llvm/test/CodeGen/PowerPC/build-vector-tests.ll =================================================================== --- llvm/test/CodeGen/PowerPC/build-vector-tests.ll +++ llvm/test/CodeGen/PowerPC/build-vector-tests.ll @@ -27,7 +27,6 @@ ; - Splat of a value in memory ; - Inserting element into existing vector ; - Inserting element from existing vector into existing vector -; ; With conversions (float <-> int) ; - Splat of a constant ; - From different values already in registers @@ -38,7 +37,6 @@ ; - Inserting element into existing vector ; - Inserting element from existing vector into existing vector ;*/ -; ;/*=================================== int ===================================*/ ;// P8: xxlxor // ;// P9: xxlxor // @@ -927,8 +925,8 @@ ; P9BE-NEXT: lxv v2, 0(r3) ; P9BE-NEXT: addis r3, r2, .LCPI7_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI7_0@toc@l -; P9BE-NEXT: lxv v3, 0(r3) -; P9BE-NEXT: vperm v2, v2, v2, v3 +; P9BE-NEXT: lxv vs0, 0(r3) +; P9BE-NEXT: xxperm v2, v2, vs0 ; P9BE-NEXT: blr ; ; P9LE-LABEL: fromDiffMemConsDi: @@ -1026,8 +1024,8 @@ ; P9BE-NEXT: lxvx v2, r3, r4 ; P9BE-NEXT: addis r3, r2, .LCPI9_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI9_0@toc@l -; P9BE-NEXT: lxv v3, 0(r3) -; P9BE-NEXT: vperm v2, v2, v2, v3 +; P9BE-NEXT: lxv vs0, 0(r3) +; P9BE-NEXT: xxperm v2, v2, vs0 ; P9BE-NEXT: blr ; ; P9LE-LABEL: fromDiffMemVarDi: @@ -1038,8 +1036,8 @@ ; P9LE-NEXT: lxvx v2, r3, r4 ; P9LE-NEXT: addis r3, r2, .LCPI9_0@toc@ha ; P9LE-NEXT: addi r3, r3, .LCPI9_0@toc@l -; P9LE-NEXT: lxv v3, 0(r3) -; P9LE-NEXT: vperm v2, v2, v2, v3 +; P9LE-NEXT: lxv vs0, 0(r3) +; P9LE-NEXT: xxperm v2, v2, vs0 ; P9LE-NEXT: blr ; ; P8BE-LABEL: fromDiffMemVarDi: @@ -1445,22 +1443,22 @@ define <4 x i32> @fromDiffMemConsDConvftoi(float* nocapture readonly %ptr) { ; P9BE-LABEL: fromDiffMemConsDConvftoi: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: lxv v2, 0(r3) +; P9BE-NEXT: lxv vs0, 0(r3) ; P9BE-NEXT: addis r3, r2, .LCPI18_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI18_0@toc@l -; P9BE-NEXT: lxv v3, 0(r3) -; P9BE-NEXT: vperm v2, v2, v2, v3 -; P9BE-NEXT: xvcvspsxws v2, v2 +; P9BE-NEXT: lxv vs1, 0(r3) +; P9BE-NEXT: xxperm vs0, vs0, vs1 +; P9BE-NEXT: xvcvspsxws v2, vs0 ; P9BE-NEXT: blr ; ; P9LE-LABEL: fromDiffMemConsDConvftoi: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: lxv v2, 0(r3) +; P9LE-NEXT: lxv vs0, 0(r3) ; P9LE-NEXT: addis r3, r2, .LCPI18_0@toc@ha ; P9LE-NEXT: addi r3, r3, .LCPI18_0@toc@l -; P9LE-NEXT: lxv v3, 0(r3) -; P9LE-NEXT: vperm v2, v2, v2, v3 -; P9LE-NEXT: xvcvspsxws v2, v2 +; P9LE-NEXT: lxv vs1, 0(r3) +; P9LE-NEXT: xxperm vs0, vs0, vs1 +; P9LE-NEXT: xvcvspsxws v2, vs0 ; P9LE-NEXT: blr ; ; P8BE-LABEL: fromDiffMemConsDConvftoi: @@ -2446,8 +2444,8 @@ ; P9BE-NEXT: lxv v2, 0(r3) ; P9BE-NEXT: addis r3, r2, .LCPI39_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI39_0@toc@l -; P9BE-NEXT: lxv v3, 0(r3) -; P9BE-NEXT: vperm v2, v2, v2, v3 +; P9BE-NEXT: lxv vs0, 0(r3) +; P9BE-NEXT: xxperm v2, v2, vs0 ; P9BE-NEXT: blr ; ; P9LE-LABEL: fromDiffMemConsDui: @@ -2545,8 +2543,8 @@ ; P9BE-NEXT: lxvx v2, r3, r4 ; P9BE-NEXT: addis r3, r2, .LCPI41_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI41_0@toc@l -; P9BE-NEXT: lxv v3, 0(r3) -; P9BE-NEXT: vperm v2, v2, v2, v3 +; P9BE-NEXT: lxv vs0, 0(r3) +; P9BE-NEXT: xxperm v2, v2, vs0 ; P9BE-NEXT: blr ; ; P9LE-LABEL: fromDiffMemVarDui: @@ -2557,8 +2555,8 @@ ; P9LE-NEXT: lxvx v2, r3, r4 ; P9LE-NEXT: addis r3, r2, .LCPI41_0@toc@ha ; P9LE-NEXT: addi r3, r3, .LCPI41_0@toc@l -; P9LE-NEXT: lxv v3, 0(r3) -; P9LE-NEXT: vperm v2, v2, v2, v3 +; P9LE-NEXT: lxv vs0, 0(r3) +; P9LE-NEXT: xxperm v2, v2, vs0 ; P9LE-NEXT: blr ; ; P8BE-LABEL: fromDiffMemVarDui: @@ -2964,22 +2962,22 @@ define <4 x i32> @fromDiffMemConsDConvftoui(float* nocapture readonly %ptr) { ; P9BE-LABEL: fromDiffMemConsDConvftoui: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: lxv v2, 0(r3) +; P9BE-NEXT: lxv vs0, 0(r3) ; P9BE-NEXT: addis r3, r2, .LCPI50_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI50_0@toc@l -; P9BE-NEXT: lxv v3, 0(r3) -; P9BE-NEXT: vperm v2, v2, v2, v3 -; P9BE-NEXT: xvcvspuxws v2, v2 +; P9BE-NEXT: lxv vs1, 0(r3) +; P9BE-NEXT: xxperm vs0, vs0, vs1 +; P9BE-NEXT: xvcvspuxws v2, vs0 ; P9BE-NEXT: blr ; ; P9LE-LABEL: fromDiffMemConsDConvftoui: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: lxv v2, 0(r3) +; P9LE-NEXT: lxv vs0, 0(r3) ; P9LE-NEXT: addis r3, r2, .LCPI50_0@toc@ha ; P9LE-NEXT: addi r3, r3, .LCPI50_0@toc@l -; P9LE-NEXT: lxv v3, 0(r3) -; P9LE-NEXT: vperm v2, v2, v2, v3 -; P9LE-NEXT: xvcvspuxws v2, v2 +; P9LE-NEXT: lxv vs1, 0(r3) +; P9LE-NEXT: xxperm vs0, vs0, vs1 +; P9LE-NEXT: xvcvspuxws v2, vs0 ; P9LE-NEXT: blr ; ; P8BE-LABEL: fromDiffMemConsDConvftoui: Index: llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll =================================================================== --- llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -419,10 +419,10 @@ ; CHECK-P9-BE: # %bb.0: # %entry ; CHECK-P9-BE-NEXT: lxsd v2, 0(r3) ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI12_0@toc@ha -; CHECK-P9-BE-NEXT: xxlxor v4, v4, v4 +; CHECK-P9-BE-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI12_0@toc@l -; CHECK-P9-BE-NEXT: lxv v3, 0(r3) -; CHECK-P9-BE-NEXT: vperm v2, v4, v2, v3 +; CHECK-P9-BE-NEXT: lxv vs1, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v2, vs0, vs1 ; CHECK-P9-BE-NEXT: blr ; ; CHECK-NOVSX-LABEL: testmrglb3: @@ -604,11 +604,12 @@ ; CHECK-P9-BE: # %bb.0: # %entry ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI15_0@toc@ha ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI15_0@toc@l -; CHECK-P9-BE-NEXT: lxv v3, 0(r3) +; CHECK-P9-BE-NEXT: lxv vs1, 0(r3) ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI15_1@toc@ha ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI15_1@toc@l -; CHECK-P9-BE-NEXT: lxv v4, 0(r3) -; CHECK-P9-BE-NEXT: vperm v2, v2, v4, v3 +; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) +; CHECK-P9-BE-NEXT: xxperm vs0, v2, vs1 +; CHECK-P9-BE-NEXT: xxlor v2, vs0, vs0 ; CHECK-P9-BE-NEXT: blr ; ; CHECK-NOVSX-LABEL: replace_undefs_in_splat: Index: llvm/test/CodeGen/PowerPC/extract-and-store.ll =================================================================== --- llvm/test/CodeGen/PowerPC/extract-and-store.ll +++ llvm/test/CodeGen/PowerPC/extract-and-store.ll @@ -597,15 +597,16 @@ ; CHECK-P9-LABEL: test_stores_exceed_vec_size: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: addis r3, r2, .LCPI16_0@toc@ha -; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 +; CHECK-P9-NEXT: xxlor vs1, vs34, vs34 ; CHECK-P9-NEXT: addi r3, r3, .LCPI16_0@toc@l -; CHECK-P9-NEXT: lxv vs35, 0(r3) +; CHECK-P9-NEXT: lxv vs0, 0(r3) ; CHECK-P9-NEXT: li r3, 16 +; CHECK-P9-NEXT: xxperm vs1, vs34, vs0 +; CHECK-P9-NEXT: xxsldwi vs0, vs34, vs34, 1 ; CHECK-P9-NEXT: stfiwx f0, r5, r3 ; CHECK-P9-NEXT: li r3, 20 +; CHECK-P9-NEXT: stxv vs1, 0(r5) ; CHECK-P9-NEXT: stxsiwx vs34, r5, r3 -; CHECK-P9-NEXT: vperm v3, v2, v2, v3 -; CHECK-P9-NEXT: stxv vs35, 0(r5) ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: test_stores_exceed_vec_size: Index: llvm/test/CodeGen/PowerPC/hoist-cp-loads.ll =================================================================== --- llvm/test/CodeGen/PowerPC/hoist-cp-loads.ll +++ llvm/test/CodeGen/PowerPC/hoist-cp-loads.ll @@ -18,18 +18,18 @@ ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: lwz r3, 0(r3) ; CHECK-NEXT: lwz r6, 0(r6) -; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, -40(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, -32(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r26, -56(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r27, -48(r1) # 8-byte Folded Spill ; CHECK-NEXT: xxsplti32dx vs8, 0, 1065353216 ; CHECK-NEXT: lwa r4, 0(r4) ; CHECK-NEXT: lfs f5, -4(r8) ; CHECK-NEXT: lfs f4, 8(r8) -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: addi r0, r5, 4 -; CHECK-NEXT: std r24, -64(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r24, -72(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r25, -64(r1) # 8-byte Folded Spill ; CHECK-NEXT: lfs f3, 12(r8) ; CHECK-NEXT: lfs f2, 16(r8) ; CHECK-NEXT: lfs f1, 20(r8) @@ -38,18 +38,19 @@ ; CHECK-NEXT: xxsplti32dx vs8, 1, -1082130432 ; CHECK-NEXT: xxlxor f6, f6, f6 ; CHECK-NEXT: xxlxor vs7, vs7, vs7 -; CHECK-NEXT: std r18, -112(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r19, -104(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r18, -120(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r19, -112(r1) # 8-byte Folded Spill ; CHECK-NEXT: lfs f0, 24(r8) ; CHECK-NEXT: plxvp vsp34, .LCPI0_0@PCREL(0), 1 -; CHECK-NEXT: std r20, -96(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r21, -88(r1) # 8-byte Folded Spill -; CHECK-NEXT: lfs f10, 0(0) -; CHECK-NEXT: plxv v4, .LCPI0_1@PCREL(0), 1 -; CHECK-NEXT: std r22, -80(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r23, -72(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r20, -104(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r21, -96(r1) # 8-byte Folded Spill +; CHECK-NEXT: lfs f11, 0(0) +; CHECK-NEXT: plxv vs9, .LCPI0_1@PCREL(0), 1 +; CHECK-NEXT: std r22, -88(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r23, -80(r1) # 8-byte Folded Spill ; CHECK-NEXT: cmpwi r3, 0 ; CHECK-NEXT: slwi r9, r6, 1 +; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill ; CHECK-NEXT: iselgt r3, 0, r3 ; CHECK-NEXT: extswsli r29, r9, 3 ; CHECK-NEXT: extsw r26, r9 @@ -87,47 +88,47 @@ ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %bb132 ; CHECK-NEXT: # -; CHECK-NEXT: xvaddsp v5, vs11, vs7 -; CHECK-NEXT: xvaddsp v0, vs11, vs7 -; CHECK-NEXT: xvaddsp v7, vs11, vs7 -; CHECK-NEXT: xvaddsp v8, vs11, vs7 +; CHECK-NEXT: xvaddsp v4, vs12, vs7 +; CHECK-NEXT: xvaddsp v5, vs12, vs7 +; CHECK-NEXT: xvaddsp v6, vs12, vs7 +; CHECK-NEXT: xvaddsp v7, vs12, vs7 ; CHECK-NEXT: addi r25, r25, 4 ; CHECK-NEXT: addi r24, r24, 1 ; CHECK-NEXT: add r0, r0, r6 ; CHECK-NEXT: add r30, r30, r6 -; CHECK-NEXT: xvaddsp v1, vs11, vs7 -; CHECK-NEXT: xvaddsp v6, vs11, vs7 -; CHECK-NEXT: xvaddsp v9, vs10, vs7 -; CHECK-NEXT: xsaddsp f10, f9, f6 +; CHECK-NEXT: xvaddsp v0, vs12, vs7 +; CHECK-NEXT: xvaddsp v1, vs12, vs7 +; CHECK-NEXT: xvaddsp v8, vs11, vs7 +; CHECK-NEXT: xsaddsp f11, f10, f6 ; CHECK-NEXT: add r29, r29, r6 ; CHECK-NEXT: cmpd r25, r4 ; CHECK-NEXT: xsaddsp f4, f4, f6 ; CHECK-NEXT: xsaddsp f3, f3, f6 ; CHECK-NEXT: xsaddsp f2, f2, f6 ; CHECK-NEXT: xsaddsp f0, f0, f6 -; CHECK-NEXT: vmrglw v10, v0, v5 -; CHECK-NEXT: vmrghw v5, v0, v5 -; CHECK-NEXT: vmrglw v0, v8, v7 -; CHECK-NEXT: vmrghw v7, v8, v7 -; CHECK-NEXT: vmrglw v8, v6, v1 -; CHECK-NEXT: vmrghw v1, v6, v1 -; CHECK-NEXT: vmrghw v6, v9, v9 -; CHECK-NEXT: xvaddsp v5, v10, v5 -; CHECK-NEXT: xvaddsp v0, v0, v7 -; CHECK-NEXT: xvaddsp v1, v8, v1 -; CHECK-NEXT: xvaddsp v6, v6, vs7 -; CHECK-NEXT: vmrglw v7, v0, v5 -; CHECK-NEXT: vmrghw v5, v0, v5 -; CHECK-NEXT: vmrglw v0, v6, v1 -; CHECK-NEXT: vmrghw v1, v6, v1 -; CHECK-NEXT: xvmaddasp v7, v5, vs8 -; CHECK-NEXT: xvmaddasp v0, v1, vs8 -; CHECK-NEXT: xxswapd vs9, v7 -; CHECK-NEXT: xscvspdpn f9, vs9 -; CHECK-NEXT: xsaddsp f5, f5, f9 -; CHECK-NEXT: xxsldwi vs9, v0, v0, 1 -; CHECK-NEXT: xscvspdpn f9, vs9 -; CHECK-NEXT: xsaddsp f1, f1, f9 +; CHECK-NEXT: vmrglw v9, v5, v4 +; CHECK-NEXT: vmrghw v4, v5, v4 +; CHECK-NEXT: vmrglw v5, v7, v6 +; CHECK-NEXT: vmrghw v6, v7, v6 +; CHECK-NEXT: vmrglw v7, v1, v0 +; CHECK-NEXT: vmrghw v0, v1, v0 +; CHECK-NEXT: vmrghw v1, v8, v8 +; CHECK-NEXT: xvaddsp v4, v9, v4 +; CHECK-NEXT: xvaddsp v5, v5, v6 +; CHECK-NEXT: xvaddsp v0, v7, v0 +; CHECK-NEXT: xvaddsp v1, v1, vs7 +; CHECK-NEXT: vmrglw v6, v5, v4 +; CHECK-NEXT: vmrghw v4, v5, v4 +; CHECK-NEXT: vmrglw v5, v1, v0 +; CHECK-NEXT: vmrghw v0, v1, v0 +; CHECK-NEXT: xvmaddasp v6, v4, vs8 +; CHECK-NEXT: xvmaddasp v5, v0, vs8 +; CHECK-NEXT: xxswapd vs10, v6 +; CHECK-NEXT: xscvspdpn f10, vs10 +; CHECK-NEXT: xsaddsp f5, f5, f10 +; CHECK-NEXT: xxsldwi vs10, v5, v5, 1 +; CHECK-NEXT: xscvspdpn f10, vs10 +; CHECK-NEXT: xsaddsp f1, f1, f10 ; CHECK-NEXT: bgt- cr0, .LBB0_6 ; CHECK-NEXT: .LBB0_2: # %bb25 ; CHECK-NEXT: # =>This Loop Header: Depth=1 @@ -136,7 +137,7 @@ ; CHECK-NEXT: add r22, r0, r26 ; CHECK-NEXT: li r21, 4 ; CHECK-NEXT: mr r20, r30 -; CHECK-NEXT: xxlxor f9, f9, f9 +; CHECK-NEXT: xxlxor f10, f10, f10 ; CHECK-NEXT: mr r19, r7 ; CHECK-NEXT: li r18, 0 ; CHECK-NEXT: mtctr r28 @@ -147,63 +148,63 @@ ; CHECK-NEXT: .LBB0_3: # %bb175 ; CHECK-NEXT: # Parent Loop BB0_2 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: lfsu f11, 8(r23) -; CHECK-NEXT: lfsu f12, 8(r18) -; CHECK-NEXT: lfsu f13, 8(r19) +; CHECK-NEXT: lfsu f12, 8(r23) +; CHECK-NEXT: lfsu f13, 8(r18) +; CHECK-NEXT: lfsu f31, 8(r19) ; CHECK-NEXT: xsaddsp f4, f4, f6 -; CHECK-NEXT: lxssp v0, 0(r20) -; CHECK-NEXT: xsmaddasp f9, f13, f11 -; CHECK-NEXT: xsmaddasp f5, f12, f11 -; CHECK-NEXT: lfsx f11, r27, r21 -; CHECK-NEXT: xsnegdp v5, f12 +; CHECK-NEXT: lxssp v5, 0(r20) +; CHECK-NEXT: xsmaddasp f10, f31, f12 +; CHECK-NEXT: xsmaddasp f5, f13, f12 +; CHECK-NEXT: lfsx f12, r27, r21 +; CHECK-NEXT: xsnegdp v4, f13 ; CHECK-NEXT: addi r21, r21, 8 -; CHECK-NEXT: xsmaddasp f9, v5, f10 -; CHECK-NEXT: xsmaddasp f5, f13, f10 -; CHECK-NEXT: xsmaddasp f4, f13, f11 -; CHECK-NEXT: lfs f11, -4(r20) +; CHECK-NEXT: xsmaddasp f10, v4, f11 +; CHECK-NEXT: xsmaddasp f5, f31, f11 +; CHECK-NEXT: xsmaddasp f4, f31, f12 +; CHECK-NEXT: lfs f12, -4(r20) ; CHECK-NEXT: addi r20, r20, 8 -; CHECK-NEXT: xsmaddasp f3, f13, f11 -; CHECK-NEXT: xsmaddasp f2, f12, f11 -; CHECK-NEXT: lfs f11, -4(r22) -; CHECK-NEXT: xsmaddasp f3, v5, v0 -; CHECK-NEXT: xsmaddasp f2, f13, v0 -; CHECK-NEXT: lxssp v0, 0(r22) +; CHECK-NEXT: xsmaddasp f3, f31, f12 +; CHECK-NEXT: xsmaddasp f2, f13, f12 +; CHECK-NEXT: lfs f12, -4(r22) +; CHECK-NEXT: xsmaddasp f3, v4, v5 +; CHECK-NEXT: xsmaddasp f2, f31, v5 +; CHECK-NEXT: lxssp v5, 0(r22) ; CHECK-NEXT: addi r22, r22, 8 -; CHECK-NEXT: xsmaddasp f1, f13, f11 -; CHECK-NEXT: xsmaddasp f0, f12, f11 -; CHECK-NEXT: xsmaddasp f1, v5, v0 -; CHECK-NEXT: xsmaddasp f0, f13, v0 +; CHECK-NEXT: xsmaddasp f1, f31, f12 +; CHECK-NEXT: xsmaddasp f0, f13, f12 +; CHECK-NEXT: xsmaddasp f1, v4, v5 +; CHECK-NEXT: xsmaddasp f0, f31, v5 ; CHECK-NEXT: bdnz .LBB0_3 ; CHECK-NEXT: # %bb.4: # %bb59 ; CHECK-NEXT: # -; CHECK-NEXT: xxlxor vs10, vs10, vs10 ; CHECK-NEXT: xxlxor vs11, vs11, vs11 +; CHECK-NEXT: xxlxor vs12, vs12, vs12 ; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_1 ; CHECK-NEXT: # %bb.5: # %bb62 ; CHECK-NEXT: # -; CHECK-NEXT: lxvp vsp32, 0(r10) -; CHECK-NEXT: xxlxor vs10, vs10, vs10 -; CHECK-NEXT: xxspltiw vs11, 2143289344 -; CHECK-NEXT: vperm v5, v1, v1, v4 -; CHECK-NEXT: lxvp vsp32, 0(r3) -; CHECK-NEXT: xvmaddasp vs10, v3, v5 -; CHECK-NEXT: vperm v5, v1, v1, v4 -; CHECK-NEXT: lxvp vsp32, 0(r11) -; CHECK-NEXT: xvmaddasp vs10, v5, vs7 -; CHECK-NEXT: vperm v5, v1, v1, v4 -; CHECK-NEXT: lxvp vsp32, 0(r12) -; CHECK-NEXT: xvmaddasp vs10, v5, vs7 -; CHECK-NEXT: vperm v5, v1, v1, v4 +; CHECK-NEXT: lxvp vsp36, 0(r10) +; CHECK-NEXT: xxlxor vs11, vs11, vs11 +; CHECK-NEXT: xxspltiw vs12, 2143289344 +; CHECK-NEXT: xxperm v5, v5, vs9 ; CHECK-NEXT: lxvp vsp32, 0(r3) -; CHECK-NEXT: lxvp vsp38, 0(r29) -; CHECK-NEXT: xvmaddasp vs10, vs7, vs7 -; CHECK-NEXT: xvmaddasp vs10, v5, vs7 -; CHECK-NEXT: vperm v5, v1, v1, v4 -; CHECK-NEXT: lxvp vsp32, 0(r2) -; CHECK-NEXT: xvmaddasp vs10, v7, v5 -; CHECK-NEXT: vperm v5, v1, v1, v4 -; CHECK-NEXT: xvmaddasp vs10, vs7, vs7 -; CHECK-NEXT: xvmaddasp vs10, v5, vs7 +; CHECK-NEXT: xvmaddasp vs11, v3, v5 +; CHECK-NEXT: xxperm v1, v1, vs9 +; CHECK-NEXT: lxvp vsp36, 0(r11) +; CHECK-NEXT: xvmaddasp vs11, v1, vs7 +; CHECK-NEXT: xxperm v5, v5, vs9 +; CHECK-NEXT: xvmaddasp vs11, v5, vs7 +; CHECK-NEXT: lxvp vsp36, 0(r12) +; CHECK-NEXT: xvmaddasp vs11, vs7, vs7 +; CHECK-NEXT: xxperm v5, v5, vs9 +; CHECK-NEXT: xvmaddasp vs11, v5, vs7 +; CHECK-NEXT: lxvp vsp36, 0(r3) +; CHECK-NEXT: lxvp vsp32, 0(r29) +; CHECK-NEXT: xxperm v5, v5, vs9 +; CHECK-NEXT: xvmaddasp vs11, v1, v5 +; CHECK-NEXT: lxvp vsp36, 0(r2) +; CHECK-NEXT: xvmaddasp vs11, vs7, vs7 +; CHECK-NEXT: xxperm v5, v5, vs9 +; CHECK-NEXT: xvmaddasp vs11, v5, vs7 ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_6: # %bb239 ; CHECK-NEXT: stfs f5, -4(r8) @@ -212,24 +213,24 @@ ; CHECK-NEXT: stfs f2, 16(r8) ; CHECK-NEXT: stfs f1, 20(r8) ; CHECK-NEXT: stfs f0, 24(r8) -; CHECK-NEXT: stfs f10, 0(0) +; CHECK-NEXT: stfs f11, 0(0) ; ; NOHOIST-LABEL: test: ; NOHOIST: # %bb.0: # %bb ; NOHOIST-NEXT: lwz r3, 0(r3) ; NOHOIST-NEXT: lwz r6, 0(r6) -; NOHOIST-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; NOHOIST-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; NOHOIST-NEXT: std r26, -48(r1) # 8-byte Folded Spill -; NOHOIST-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; NOHOIST-NEXT: std r28, -40(r1) # 8-byte Folded Spill +; NOHOIST-NEXT: std r29, -32(r1) # 8-byte Folded Spill +; NOHOIST-NEXT: std r26, -56(r1) # 8-byte Folded Spill +; NOHOIST-NEXT: std r27, -48(r1) # 8-byte Folded Spill ; NOHOIST-NEXT: xxsplti32dx vs8, 0, 1065353216 ; NOHOIST-NEXT: lwa r4, 0(r4) ; NOHOIST-NEXT: lfs f5, -4(r8) ; NOHOIST-NEXT: lfs f4, 8(r8) -; NOHOIST-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; NOHOIST-NEXT: std r30, -24(r1) # 8-byte Folded Spill ; NOHOIST-NEXT: addi r0, r5, 4 -; NOHOIST-NEXT: std r24, -64(r1) # 8-byte Folded Spill -; NOHOIST-NEXT: std r25, -56(r1) # 8-byte Folded Spill +; NOHOIST-NEXT: std r24, -72(r1) # 8-byte Folded Spill +; NOHOIST-NEXT: std r25, -64(r1) # 8-byte Folded Spill ; NOHOIST-NEXT: lfs f3, 12(r8) ; NOHOIST-NEXT: lfs f2, 16(r8) ; NOHOIST-NEXT: lfs f1, 20(r8) @@ -238,17 +239,18 @@ ; NOHOIST-NEXT: xxsplti32dx vs8, 1, -1082130432 ; NOHOIST-NEXT: xxlxor f6, f6, f6 ; NOHOIST-NEXT: xxlxor vs7, vs7, vs7 -; NOHOIST-NEXT: std r18, -112(r1) # 8-byte Folded Spill -; NOHOIST-NEXT: std r19, -104(r1) # 8-byte Folded Spill +; NOHOIST-NEXT: std r18, -120(r1) # 8-byte Folded Spill +; NOHOIST-NEXT: std r19, -112(r1) # 8-byte Folded Spill ; NOHOIST-NEXT: lfs f0, 24(r8) -; NOHOIST-NEXT: plxv v2, .LCPI0_1@PCREL(0), 1 -; NOHOIST-NEXT: std r20, -96(r1) # 8-byte Folded Spill -; NOHOIST-NEXT: std r21, -88(r1) # 8-byte Folded Spill -; NOHOIST-NEXT: lfs f10, 0(0) +; NOHOIST-NEXT: plxv vs9, .LCPI0_1@PCREL(0), 1 +; NOHOIST-NEXT: std r20, -104(r1) # 8-byte Folded Spill +; NOHOIST-NEXT: std r21, -96(r1) # 8-byte Folded Spill +; NOHOIST-NEXT: lfs f11, 0(0) ; NOHOIST-NEXT: cmpwi r3, 0 ; NOHOIST-NEXT: slwi r9, r6, 1 -; NOHOIST-NEXT: std r22, -80(r1) # 8-byte Folded Spill -; NOHOIST-NEXT: std r23, -72(r1) # 8-byte Folded Spill +; NOHOIST-NEXT: std r22, -88(r1) # 8-byte Folded Spill +; NOHOIST-NEXT: std r23, -80(r1) # 8-byte Folded Spill +; NOHOIST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill ; NOHOIST-NEXT: iselgt r3, 0, r3 ; NOHOIST-NEXT: extswsli r29, r9, 3 ; NOHOIST-NEXT: extsw r26, r9 @@ -286,47 +288,47 @@ ; NOHOIST-NEXT: .p2align 4 ; NOHOIST-NEXT: .LBB0_1: # %bb132 ; NOHOIST-NEXT: # -; NOHOIST-NEXT: xvaddsp v3, vs11, vs7 -; NOHOIST-NEXT: xvaddsp v4, vs11, vs7 -; NOHOIST-NEXT: xvaddsp v1, vs11, vs7 -; NOHOIST-NEXT: xvaddsp v6, vs11, vs7 +; NOHOIST-NEXT: xvaddsp v2, vs12, vs7 +; NOHOIST-NEXT: xvaddsp v3, vs12, vs7 +; NOHOIST-NEXT: xvaddsp v0, vs12, vs7 +; NOHOIST-NEXT: xvaddsp v1, vs12, vs7 ; NOHOIST-NEXT: addi r25, r25, 4 ; NOHOIST-NEXT: addi r24, r24, 1 ; NOHOIST-NEXT: add r0, r0, r6 ; NOHOIST-NEXT: add r30, r30, r6 -; NOHOIST-NEXT: xvaddsp v5, vs11, vs7 -; NOHOIST-NEXT: xvaddsp v0, vs11, vs7 -; NOHOIST-NEXT: xvaddsp v7, vs10, vs7 -; NOHOIST-NEXT: xsaddsp f10, f9, f6 +; NOHOIST-NEXT: xvaddsp v4, vs12, vs7 +; NOHOIST-NEXT: xvaddsp v5, vs12, vs7 +; NOHOIST-NEXT: xvaddsp v6, vs11, vs7 +; NOHOIST-NEXT: xsaddsp f11, f10, f6 ; NOHOIST-NEXT: add r29, r29, r6 ; NOHOIST-NEXT: cmpd r25, r4 ; NOHOIST-NEXT: xsaddsp f4, f4, f6 ; NOHOIST-NEXT: xsaddsp f3, f3, f6 ; NOHOIST-NEXT: xsaddsp f2, f2, f6 ; NOHOIST-NEXT: xsaddsp f0, f0, f6 -; NOHOIST-NEXT: vmrglw v8, v4, v3 -; NOHOIST-NEXT: vmrghw v3, v4, v3 -; NOHOIST-NEXT: vmrglw v4, v6, v1 -; NOHOIST-NEXT: vmrghw v1, v6, v1 -; NOHOIST-NEXT: vmrglw v6, v0, v5 -; NOHOIST-NEXT: vmrghw v5, v0, v5 -; NOHOIST-NEXT: vmrghw v0, v7, v7 -; NOHOIST-NEXT: xvaddsp v3, v8, v3 -; NOHOIST-NEXT: xvaddsp v4, v4, v1 -; NOHOIST-NEXT: xvaddsp v5, v6, v5 -; NOHOIST-NEXT: xvaddsp v0, v0, vs7 -; NOHOIST-NEXT: vmrglw v1, v4, v3 -; NOHOIST-NEXT: vmrghw v3, v4, v3 -; NOHOIST-NEXT: vmrglw v4, v0, v5 -; NOHOIST-NEXT: vmrghw v5, v0, v5 -; NOHOIST-NEXT: xvmaddasp v1, v3, vs8 -; NOHOIST-NEXT: xvmaddasp v4, v5, vs8 -; NOHOIST-NEXT: xxswapd vs9, v1 -; NOHOIST-NEXT: xscvspdpn f9, vs9 -; NOHOIST-NEXT: xsaddsp f5, f5, f9 -; NOHOIST-NEXT: xxsldwi vs9, v4, v4, 1 -; NOHOIST-NEXT: xscvspdpn f9, vs9 -; NOHOIST-NEXT: xsaddsp f1, f1, f9 +; NOHOIST-NEXT: vmrglw v7, v3, v2 +; NOHOIST-NEXT: vmrghw v2, v3, v2 +; NOHOIST-NEXT: vmrglw v3, v1, v0 +; NOHOIST-NEXT: vmrghw v0, v1, v0 +; NOHOIST-NEXT: vmrglw v1, v5, v4 +; NOHOIST-NEXT: vmrghw v4, v5, v4 +; NOHOIST-NEXT: vmrghw v5, v6, v6 +; NOHOIST-NEXT: xvaddsp v2, v7, v2 +; NOHOIST-NEXT: xvaddsp v3, v3, v0 +; NOHOIST-NEXT: xvaddsp v4, v1, v4 +; NOHOIST-NEXT: xvaddsp v5, v5, vs7 +; NOHOIST-NEXT: vmrglw v0, v3, v2 +; NOHOIST-NEXT: vmrghw v2, v3, v2 +; NOHOIST-NEXT: vmrglw v3, v5, v4 +; NOHOIST-NEXT: vmrghw v4, v5, v4 +; NOHOIST-NEXT: xvmaddasp v0, v2, vs8 +; NOHOIST-NEXT: xvmaddasp v3, v4, vs8 +; NOHOIST-NEXT: xxswapd vs10, v0 +; NOHOIST-NEXT: xscvspdpn f10, vs10 +; NOHOIST-NEXT: xsaddsp f5, f5, f10 +; NOHOIST-NEXT: xxsldwi vs10, v3, v3, 1 +; NOHOIST-NEXT: xscvspdpn f10, vs10 +; NOHOIST-NEXT: xsaddsp f1, f1, f10 ; NOHOIST-NEXT: bgt- cr0, .LBB0_6 ; NOHOIST-NEXT: .LBB0_2: # %bb25 ; NOHOIST-NEXT: # =>This Loop Header: Depth=1 @@ -335,7 +337,7 @@ ; NOHOIST-NEXT: add r22, r0, r26 ; NOHOIST-NEXT: li r21, 4 ; NOHOIST-NEXT: mr r20, r30 -; NOHOIST-NEXT: xxlxor f9, f9, f9 +; NOHOIST-NEXT: xxlxor f10, f10, f10 ; NOHOIST-NEXT: mr r19, r7 ; NOHOIST-NEXT: li r18, 0 ; NOHOIST-NEXT: mtctr r28 @@ -346,64 +348,64 @@ ; NOHOIST-NEXT: .LBB0_3: # %bb175 ; NOHOIST-NEXT: # Parent Loop BB0_2 Depth=1 ; NOHOIST-NEXT: # => This Inner Loop Header: Depth=2 -; NOHOIST-NEXT: lfsu f11, 8(r23) -; NOHOIST-NEXT: lfsu f12, 8(r18) -; NOHOIST-NEXT: lfsu f13, 8(r19) +; NOHOIST-NEXT: lfsu f12, 8(r23) +; NOHOIST-NEXT: lfsu f13, 8(r18) +; NOHOIST-NEXT: lfsu f31, 8(r19) ; NOHOIST-NEXT: xsaddsp f4, f4, f6 -; NOHOIST-NEXT: lxssp v4, 0(r20) -; NOHOIST-NEXT: xsmaddasp f9, f13, f11 -; NOHOIST-NEXT: xsmaddasp f5, f12, f11 -; NOHOIST-NEXT: lfsx f11, r27, r21 -; NOHOIST-NEXT: xsnegdp v3, f12 +; NOHOIST-NEXT: lxssp v3, 0(r20) +; NOHOIST-NEXT: xsmaddasp f10, f31, f12 +; NOHOIST-NEXT: xsmaddasp f5, f13, f12 +; NOHOIST-NEXT: lfsx f12, r27, r21 +; NOHOIST-NEXT: xsnegdp v2, f13 ; NOHOIST-NEXT: addi r21, r21, 8 -; NOHOIST-NEXT: xsmaddasp f9, v3, f10 -; NOHOIST-NEXT: xsmaddasp f5, f13, f10 -; NOHOIST-NEXT: xsmaddasp f4, f13, f11 -; NOHOIST-NEXT: lfs f11, -4(r20) +; NOHOIST-NEXT: xsmaddasp f10, v2, f11 +; NOHOIST-NEXT: xsmaddasp f5, f31, f11 +; NOHOIST-NEXT: xsmaddasp f4, f31, f12 +; NOHOIST-NEXT: lfs f12, -4(r20) ; NOHOIST-NEXT: addi r20, r20, 8 -; NOHOIST-NEXT: xsmaddasp f3, f13, f11 -; NOHOIST-NEXT: xsmaddasp f2, f12, f11 -; NOHOIST-NEXT: lfs f11, -4(r22) -; NOHOIST-NEXT: xsmaddasp f3, v3, v4 -; NOHOIST-NEXT: xsmaddasp f2, f13, v4 -; NOHOIST-NEXT: lxssp v4, 0(r22) +; NOHOIST-NEXT: xsmaddasp f3, f31, f12 +; NOHOIST-NEXT: xsmaddasp f2, f13, f12 +; NOHOIST-NEXT: lfs f12, -4(r22) +; NOHOIST-NEXT: xsmaddasp f3, v2, v3 +; NOHOIST-NEXT: xsmaddasp f2, f31, v3 +; NOHOIST-NEXT: lxssp v3, 0(r22) ; NOHOIST-NEXT: addi r22, r22, 8 -; NOHOIST-NEXT: xsmaddasp f1, f13, f11 -; NOHOIST-NEXT: xsmaddasp f0, f12, f11 -; NOHOIST-NEXT: xsmaddasp f1, v3, v4 -; NOHOIST-NEXT: xsmaddasp f0, f13, v4 +; NOHOIST-NEXT: xsmaddasp f1, f31, f12 +; NOHOIST-NEXT: xsmaddasp f0, f13, f12 +; NOHOIST-NEXT: xsmaddasp f1, v2, v3 +; NOHOIST-NEXT: xsmaddasp f0, f31, v3 ; NOHOIST-NEXT: bdnz .LBB0_3 ; NOHOIST-NEXT: # %bb.4: # %bb59 ; NOHOIST-NEXT: # -; NOHOIST-NEXT: xxlxor vs10, vs10, vs10 ; NOHOIST-NEXT: xxlxor vs11, vs11, vs11 +; NOHOIST-NEXT: xxlxor vs12, vs12, vs12 ; NOHOIST-NEXT: bc 12, 4*cr5+lt, .LBB0_1 ; NOHOIST-NEXT: # %bb.5: # %bb62 ; NOHOIST-NEXT: # -; NOHOIST-NEXT: lxvp vsp36, 0(r10) -; NOHOIST-NEXT: plxvp vsp32, .LCPI0_0@PCREL(0), 1 -; NOHOIST-NEXT: xxlxor vs10, vs10, vs10 -; NOHOIST-NEXT: xxspltiw vs11, 2143289344 -; NOHOIST-NEXT: vperm v3, v5, v5, v2 -; NOHOIST-NEXT: lxvp vsp36, 0(r3) -; NOHOIST-NEXT: xvmaddasp vs10, v1, v3 -; NOHOIST-NEXT: vperm v3, v5, v5, v2 -; NOHOIST-NEXT: lxvp vsp36, 0(r11) -; NOHOIST-NEXT: xvmaddasp vs10, v3, vs7 -; NOHOIST-NEXT: vperm v3, v5, v5, v2 -; NOHOIST-NEXT: lxvp vsp36, 0(r12) -; NOHOIST-NEXT: xvmaddasp vs10, v3, vs7 -; NOHOIST-NEXT: vperm v3, v5, v5, v2 -; NOHOIST-NEXT: lxvp vsp36, 0(r3) -; NOHOIST-NEXT: lxvp vsp32, 0(r29) -; NOHOIST-NEXT: xvmaddasp vs10, vs7, vs7 -; NOHOIST-NEXT: xvmaddasp vs10, v3, vs7 -; NOHOIST-NEXT: vperm v3, v5, v5, v2 -; NOHOIST-NEXT: lxvp vsp36, 0(r2) -; NOHOIST-NEXT: xvmaddasp vs10, v1, v3 -; NOHOIST-NEXT: vperm v3, v5, v5, v2 -; NOHOIST-NEXT: xvmaddasp vs10, vs7, vs7 -; NOHOIST-NEXT: xvmaddasp vs10, v3, vs7 +; NOHOIST-NEXT: lxvp vsp34, 0(r10) +; NOHOIST-NEXT: plxvp vsp36, .LCPI0_0@PCREL(0), 1 +; NOHOIST-NEXT: xxlxor vs11, vs11, vs11 +; NOHOIST-NEXT: xxspltiw vs12, 2143289344 +; NOHOIST-NEXT: xxperm v3, v3, vs9 +; NOHOIST-NEXT: lxvp vsp32, 0(r3) +; NOHOIST-NEXT: xvmaddasp vs11, v5, v3 +; NOHOIST-NEXT: xxperm v1, v1, vs9 +; NOHOIST-NEXT: lxvp vsp34, 0(r11) +; NOHOIST-NEXT: xvmaddasp vs11, v1, vs7 +; NOHOIST-NEXT: xxperm v3, v3, vs9 +; NOHOIST-NEXT: xvmaddasp vs11, v3, vs7 +; NOHOIST-NEXT: xvmaddasp vs11, vs7, vs7 +; NOHOIST-NEXT: lxvp vsp34, 0(r12) +; NOHOIST-NEXT: xxperm v3, v3, vs9 +; NOHOIST-NEXT: xvmaddasp vs11, v3, vs7 +; NOHOIST-NEXT: lxvp vsp34, 0(r3) +; NOHOIST-NEXT: lxvp vsp36, 0(r29) +; NOHOIST-NEXT: xxperm v3, v3, vs9 +; NOHOIST-NEXT: xvmaddasp vs11, v5, v3 +; NOHOIST-NEXT: lxvp vsp34, 0(r2) +; NOHOIST-NEXT: xvmaddasp vs11, vs7, vs7 +; NOHOIST-NEXT: xxperm v3, v3, vs9 +; NOHOIST-NEXT: xvmaddasp vs11, v3, vs7 ; NOHOIST-NEXT: b .LBB0_1 ; NOHOIST-NEXT: .LBB0_6: # %bb239 ; NOHOIST-NEXT: stfs f5, -4(r8) @@ -412,7 +414,7 @@ ; NOHOIST-NEXT: stfs f2, 16(r8) ; NOHOIST-NEXT: stfs f1, 20(r8) ; NOHOIST-NEXT: stfs f0, 24(r8) -; NOHOIST-NEXT: stfs f10, 0(0) +; NOHOIST-NEXT: stfs f11, 0(0) bb: %i = load i32, i32* %arg3, align 4 %i6 = shl i32 %i, 1 Index: llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll =================================================================== --- llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll +++ llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll @@ -96,8 +96,8 @@ ; CHECK-P9-BE-NEXT: lxv v2, 0(r3) ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-P9-BE-NEXT: lxv v3, 0(r3) -; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0 ; CHECK-P9-BE-NEXT: blr %v1 = load <4 x i32>, <4 x i32>* %vp1 %v2 = load <4 x i32>, <4 x i32>* %vp2 @@ -134,8 +134,8 @@ ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha ; CHECK-P9-BE-NEXT: lxv v2, 0(r4) ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-P9-BE-NEXT: lxv v3, 0(r3) -; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0 ; CHECK-P9-BE-NEXT: blr %v1 = load <4 x i32>, <4 x i32>* %vp1 %v2 = load <4 x i32>, <4 x i32>* %vp2 @@ -172,8 +172,8 @@ ; CHECK-P9-BE-NEXT: lxv v2, 0(r3) ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-P9-BE-NEXT: lxv v3, 0(r3) -; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0 ; CHECK-P9-BE-NEXT: blr %v1 = load <8 x i16>, <8 x i16>* %vp1 %v2 = load <8 x i16>, <8 x i16>* %vp2 @@ -210,8 +210,8 @@ ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha ; CHECK-P9-BE-NEXT: lxv v2, 0(r4) ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-P9-BE-NEXT: lxv v3, 0(r3) -; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0 ; CHECK-P9-BE-NEXT: blr %v1 = load <8 x i16>, <8 x i16>* %vp1 %v2 = load <8 x i16>, <8 x i16>* %vp2 @@ -346,8 +346,8 @@ ; CHECK-P9-BE-NEXT: lxv v2, 0(r3) ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI9_0@toc@ha ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI9_0@toc@l -; CHECK-P9-BE-NEXT: lxv v3, 0(r3) -; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0 ; CHECK-P9-BE-NEXT: blr %v1 = load <4 x float>, <4 x float>* %vp1 %v2 = load <4 x float>, <4 x float>* %vp2 @@ -384,8 +384,8 @@ ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI10_0@toc@ha ; CHECK-P9-BE-NEXT: lxv v2, 0(r4) ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI10_0@toc@l -; CHECK-P9-BE-NEXT: lxv v3, 0(r3) -; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0 ; CHECK-P9-BE-NEXT: blr %v1 = load <4 x float>, <4 x float>* %vp1 %v2 = load <4 x float>, <4 x float>* %vp2 @@ -475,8 +475,8 @@ ; CHECK-P9-BE: # %bb.0: ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI13_0@toc@ha ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI13_0@toc@l -; CHECK-P9-BE-NEXT: lxv v3, 0(r3) -; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0 ; CHECK-P9-BE-NEXT: stxv v2, 0(r7) ; CHECK-P9-BE-NEXT: blr %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> @@ -512,9 +512,9 @@ ; CHECK-P9-BE: # %bb.0: ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI14_0@toc@ha ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI14_0@toc@l -; CHECK-P9-BE-NEXT: lxv v2, 0(r3) -; CHECK-P9-BE-NEXT: vperm v2, v3, v3, v2 -; CHECK-P9-BE-NEXT: stxv v2, 0(r7) +; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v3, v3, vs0 +; CHECK-P9-BE-NEXT: stxv v3, 0(r7) ; CHECK-P9-BE-NEXT: blr %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> store <4 x i32> %v3, <4 x i32>* %vp @@ -549,8 +549,8 @@ ; CHECK-P9-BE: # %bb.0: ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI15_0@toc@ha ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI15_0@toc@l -; CHECK-P9-BE-NEXT: lxv v3, 0(r3) -; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0 ; CHECK-P9-BE-NEXT: stxv v2, 0(r7) ; CHECK-P9-BE-NEXT: blr %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> @@ -586,9 +586,9 @@ ; CHECK-P9-BE: # %bb.0: ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI16_0@toc@ha ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI16_0@toc@l -; CHECK-P9-BE-NEXT: lxv v2, 0(r3) -; CHECK-P9-BE-NEXT: vperm v2, v3, v3, v2 -; CHECK-P9-BE-NEXT: stxv v2, 0(r7) +; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v3, v3, vs0 +; CHECK-P9-BE-NEXT: stxv v3, 0(r7) ; CHECK-P9-BE-NEXT: blr %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> store <8 x i16> %v3, <8 x i16>* %vp @@ -745,8 +745,8 @@ ; CHECK-P9-BE: # %bb.0: ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI21_0@toc@ha ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI21_0@toc@l -; CHECK-P9-BE-NEXT: lxv v3, 0(r3) -; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0 ; CHECK-P9-BE-NEXT: stxv v2, 0(r7) ; CHECK-P9-BE-NEXT: blr %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> @@ -782,9 +782,9 @@ ; CHECK-P9-BE: # %bb.0: ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI22_0@toc@ha ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI22_0@toc@l -; CHECK-P9-BE-NEXT: lxv v2, 0(r3) -; CHECK-P9-BE-NEXT: vperm v2, v3, v3, v2 -; CHECK-P9-BE-NEXT: stxv v2, 0(r7) +; CHECK-P9-BE-NEXT: lxv vs0, 0(r3) +; CHECK-P9-BE-NEXT: xxperm v3, v3, vs0 +; CHECK-P9-BE-NEXT: stxv v3, 0(r7) ; CHECK-P9-BE-NEXT: blr %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> store <4 x float> %v3, <4 x float>* %vp Index: llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll =================================================================== --- llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll +++ llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll @@ -81,8 +81,8 @@ define <1 x i128> @test_vrlqnm(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) { ; CHECK-LE-LABEL: test_vrlqnm: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: plxv v5, .LCPI4_0@PCREL(0), 1 -; CHECK-LE-NEXT: vperm v3, v4, v3, v5 +; CHECK-LE-NEXT: plxv vs0, .LCPI4_0@PCREL(0), 1 +; CHECK-LE-NEXT: xxperm v3, v4, vs0 ; CHECK-LE-NEXT: vrlqnm v2, v2, v3 ; CHECK-LE-NEXT: blr ; @@ -90,9 +90,9 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxv v5, 0(r3) -; CHECK-BE-NEXT: vperm v3, v3, v4, v5 -; CHECK-BE-NEXT: vrlqnm v2, v2, v3 +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxperm v4, v3, vs0 +; CHECK-BE-NEXT: vrlqnm v2, v2, v4 ; CHECK-BE-NEXT: blr entry: %0 = bitcast <1 x i128> %b to <16 x i8> Index: llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll =================================================================== --- llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll +++ llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll @@ -451,8 +451,8 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI16_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI16_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -464,8 +464,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI17_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI17_0@toc@l -; CHECK-NEXT: lxv 35, 0(3) -; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: lxv 0, 0(3) +; CHECK-NEXT: xxperm 34, 34, 0 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: shuffle_vector_halfword_1_3: @@ -482,8 +482,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI18_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI18_0@toc@l -; CHECK-NEXT: lxv 35, 0(3) -; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: lxv 0, 0(3) +; CHECK-NEXT: xxperm 34, 34, 0 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: shuffle_vector_halfword_2_3: @@ -505,8 +505,8 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI19_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI19_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -518,8 +518,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI20_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI20_0@toc@l -; CHECK-NEXT: lxv 35, 0(3) -; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: lxv 0, 0(3) +; CHECK-NEXT: xxperm 34, 34, 0 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: shuffle_vector_halfword_4_3: @@ -536,8 +536,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI21_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI21_0@toc@l -; CHECK-NEXT: lxv 35, 0(3) -; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: lxv 0, 0(3) +; CHECK-NEXT: xxperm 34, 34, 0 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: shuffle_vector_halfword_5_3: @@ -559,8 +559,8 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI22_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI22_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -577,8 +577,8 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI23_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI23_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> @@ -1455,8 +1455,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI56_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI56_0@toc@l -; CHECK-NEXT: lxv 35, 0(3) -; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: lxv 0, 0(3) +; CHECK-NEXT: xxperm 34, 34, 0 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: shuffle_vector_byte_0_7: @@ -1478,8 +1478,8 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI57_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI57_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1496,8 +1496,8 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI58_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI58_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1509,8 +1509,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI59_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI59_0@toc@l -; CHECK-NEXT: lxv 35, 0(3) -; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: lxv 0, 0(3) +; CHECK-NEXT: xxperm 34, 34, 0 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: shuffle_vector_byte_3_7: @@ -1527,8 +1527,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI60_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI60_0@toc@l -; CHECK-NEXT: lxv 35, 0(3) -; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: lxv 0, 0(3) +; CHECK-NEXT: xxperm 34, 34, 0 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: shuffle_vector_byte_4_7: @@ -1550,8 +1550,8 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI61_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI61_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1568,8 +1568,8 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI62_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI62_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1586,8 +1586,8 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI63_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI63_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1599,8 +1599,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI64_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI64_0@toc@l -; CHECK-NEXT: lxv 35, 0(3) -; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: lxv 0, 0(3) +; CHECK-NEXT: xxperm 34, 34, 0 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: shuffle_vector_byte_8_7: @@ -1617,8 +1617,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI65_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI65_0@toc@l -; CHECK-NEXT: lxv 35, 0(3) -; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: lxv 0, 0(3) +; CHECK-NEXT: xxperm 34, 34, 0 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: shuffle_vector_byte_9_7: @@ -1635,8 +1635,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI66_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI66_0@toc@l -; CHECK-NEXT: lxv 35, 0(3) -; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: lxv 0, 0(3) +; CHECK-NEXT: xxperm 34, 34, 0 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: shuffle_vector_byte_10_7: @@ -1658,8 +1658,8 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI67_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI67_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1676,8 +1676,8 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI68_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI68_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> @@ -1689,8 +1689,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI69_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI69_0@toc@l -; CHECK-NEXT: lxv 35, 0(3) -; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: lxv 0, 0(3) +; CHECK-NEXT: xxperm 34, 34, 0 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: shuffle_vector_byte_13_7: @@ -1707,8 +1707,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addis 3, 2, .LCPI70_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI70_0@toc@l -; CHECK-NEXT: lxv 35, 0(3) -; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: lxv 0, 0(3) +; CHECK-NEXT: xxperm 34, 34, 0 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: shuffle_vector_byte_14_7: @@ -1730,8 +1730,8 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI71_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI71_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> Index: llvm/test/CodeGen/PowerPC/pre-inc-disable.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -14,13 +14,13 @@ ; CHECK-LABEL: test64: ; CHECK-NOT: ldux ; CHECK-NOT: mtvsrd -; CHECK: lxsdx [[REG:[0-9]+]] -; CHECK: vperm {{[0-9]+}}, [[REG]] +; CHECK: lxsd [[REG:[0-9]+]] +; CHECK: xxperm {{[0-9]+}}, [[REG]] ; P9BE-LABEL: test64: ; P9BE-NOT: ldux ; P9BE-NOT: mtvsrd ; P9BE: lxsdx [[REG:[0-9]+]] -; P9BE: vperm {{[0-9]+}}, {{[0-9]+}}, [[REG]] +; P9BE: xxperm [[REG]], {{[0-9]+}}, {{[0-9]+}} entry: %idx.ext63 = sext i32 %i_pix2 to i64 %add.ptr64 = getelementptr inbounds i8, i8* %pix2, i64 %idx.ext63 @@ -49,8 +49,8 @@ ; CHECK-LABEL: test32: ; CHECK-NOT: lwzux ; CHECK-NOT: mtvsrwz -; CHECK: lxsiwzx [[REG:[0-9]+]] -; CHECK: vperm {{[0-9]+}}, [[REG]] +; CHECK: lfiwzx [[REG:[0-9]+]] +; CHECK: xxperm {{[0-9]+}}, [[REG]] ; P9BE-LABEL: test32: ; P9BE-NOT: lwzux ; P9BE-NOT: mtvsrwz Index: llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll =================================================================== --- llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll +++ llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll @@ -94,15 +94,15 @@ ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, -16728 -; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: mtfprwz f0, r3 ; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; P9BE-NEXT: ori r4, r4, 63249 ; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; P9BE-NEXT: lxv v5, 0(r3) +; P9BE-NEXT: lxv vs1, 0(r3) ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: vperm v3, v4, v3, v5 +; P9BE-NEXT: xxperm v3, vs0, vs1 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 8 @@ -121,9 +121,9 @@ ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 98 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: mtfprwz f0, r3 +; P9BE-NEXT: xxperm v4, vs0, vs1 +; P9BE-NEXT: vmrghw v2, v3, v4 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_srem_vec_1: @@ -320,14 +320,14 @@ ; P9BE-NEXT: add r5, r5, r6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: mtfprwz f0, r3 ; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; P9BE-NEXT: lxv v5, 0(r3) +; P9BE-NEXT: lxv vs1, 0(r3) ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: vperm v3, v4, v3, v5 +; P9BE-NEXT: xxperm v3, vs0, vs1 ; P9BE-NEXT: mulhw r5, r3, r4 ; P9BE-NEXT: add r5, r5, r3 ; P9BE-NEXT: srwi r6, r5, 31 @@ -346,9 +346,9 @@ ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: mtfprwz f0, r3 +; P9BE-NEXT: xxperm v4, vs0, vs1 +; P9BE-NEXT: vmrghw v2, v4, v3 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_srem_vec_2: @@ -547,14 +547,14 @@ ; P9BE-NEXT: add r6, r6, r7 ; P9BE-NEXT: mulli r7, r6, 95 ; P9BE-NEXT: sub r3, r3, r7 -; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: mtfprwz f0, r3 ; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; P9BE-NEXT: lxv v5, 0(r3) +; P9BE-NEXT: lxv vs1, 0(r3) ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r7, r3 -; P9BE-NEXT: vperm v3, v4, v3, v5 +; P9BE-NEXT: xxperm v3, vs0, vs1 ; P9BE-NEXT: mulhw r8, r7, r5 ; P9BE-NEXT: add r7, r8, r7 ; P9BE-NEXT: srwi r8, r7, 31 @@ -572,16 +572,16 @@ ; P9BE-NEXT: srawi r5, r5, 6 ; P9BE-NEXT: add r5, r5, r8 ; P9BE-NEXT: mulli r8, r5, 95 -; P9BE-NEXT: mtvsrwz v0, r5 ; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: mtvsrwz v4, r6 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: mtfprwz f0, r3 +; P9BE-NEXT: xxperm v4, vs0, vs1 +; P9BE-NEXT: mtfprwz f0, r6 +; P9BE-NEXT: vmrghw v2, v4, v3 ; P9BE-NEXT: mtvsrwz v3, r4 -; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mtvsrwz v4, r7 -; P9BE-NEXT: vperm v4, v0, v4, v5 +; P9BE-NEXT: xxperm v3, vs0, vs1 +; P9BE-NEXT: mtfprwz f0, r5 +; P9BE-NEXT: xxperm v4, vs0, vs1 ; P9BE-NEXT: vmrghw v3, v4, v3 ; P9BE-NEXT: vadduhm v2, v2, v3 ; P9BE-NEXT: blr @@ -777,15 +777,15 @@ ; P9BE-NEXT: slwi r4, r4, 6 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, -21386 -; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: mtfprwz f0, r3 ; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha ; P9BE-NEXT: ori r4, r4, 37253 ; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; P9BE-NEXT: lxv v5, 0(r3) +; P9BE-NEXT: lxv vs1, 0(r3) ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: vperm v3, v4, v3, v5 +; P9BE-NEXT: xxperm v3, vs0, vs1 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 @@ -801,9 +801,9 @@ ; P9BE-NEXT: addze r4, r4 ; P9BE-NEXT: slwi r4, r4, 3 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: mtfprwz f0, r3 +; P9BE-NEXT: xxperm v4, vs0, vs1 +; P9BE-NEXT: vmrghw v2, v3, v4 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_srem_power_of_two: @@ -959,7 +959,7 @@ ; P9BE-NEXT: mulli r4, r4, 23 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, 24749 -; P9BE-NEXT: mtvsrwz v3, r3 +; P9BE-NEXT: mtfprwz f0, r3 ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: ori r4, r4, 47143 ; P9BE-NEXT: vextuhlx r3, r3, v2 @@ -971,15 +971,15 @@ ; P9BE-NEXT: mulli r4, r4, 5423 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, -14230 -; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; P9BE-NEXT: ori r4, r4, 30865 ; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; P9BE-NEXT: lxv v5, 0(r3) +; P9BE-NEXT: lxv vs1, 0(r3) ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: vperm v3, v3, v4, v5 +; P9BE-NEXT: xxperm v3, vs0, vs1 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 @@ -989,8 +989,8 @@ ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: mtvsrwz v2, r3 ; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: vperm v2, v4, v2, v5 +; P9BE-NEXT: mtfprwz f0, r3 +; P9BE-NEXT: xxperm v2, vs0, vs1 ; P9BE-NEXT: vmrghw v2, v2, v3 ; P9BE-NEXT: blr ; @@ -1150,7 +1150,7 @@ ; P9BE-NEXT: mulli r4, r4, 23 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, 24749 -; P9BE-NEXT: mtvsrwz v3, r3 +; P9BE-NEXT: mtfprwz f0, r3 ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: ori r4, r4, 47143 ; P9BE-NEXT: vextuhlx r3, r3, v2 @@ -1161,22 +1161,22 @@ ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 5423 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; P9BE-NEXT: lxv v5, 0(r3) +; P9BE-NEXT: lxv vs1, 0(r3) ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: vperm v3, v3, v4, v5 +; P9BE-NEXT: xxperm v3, vs0, vs1 ; P9BE-NEXT: srawi r4, r3, 15 ; P9BE-NEXT: addze r4, r4 ; P9BE-NEXT: slwi r4, r4, 15 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: mtvsrwz v2, r3 ; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: vperm v2, v4, v2, v5 +; P9BE-NEXT: mtfprwz f0, r3 +; P9BE-NEXT: xxperm v2, vs0, vs1 ; P9BE-NEXT: vmrghw v2, v2, v3 ; P9BE-NEXT: blr ; Index: llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll =================================================================== --- llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll +++ llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll @@ -83,15 +83,15 @@ ; P9BE-NEXT: srwi r4, r4, 5 ; P9BE-NEXT: mulli r4, r4, 98 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: mtfprwz f0, r3 ; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; P9BE-NEXT: lxv v5, 0(r3) +; P9BE-NEXT: lxv vs1, 0(r3) ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r4, r3, 16 ; P9BE-NEXT: rlwinm r3, r3, 30, 18, 31 -; P9BE-NEXT: vperm v3, v4, v3, v5 +; P9BE-NEXT: xxperm v3, vs0, vs1 ; P9BE-NEXT: mulhwu r3, r3, r5 ; P9BE-NEXT: srwi r3, r3, 2 ; P9BE-NEXT: mulli r3, r3, 124 @@ -109,9 +109,9 @@ ; P9BE-NEXT: srwi r4, r4, 6 ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: mtfprwz f0, r3 +; P9BE-NEXT: xxperm v4, vs0, vs1 +; P9BE-NEXT: vmrghw v2, v4, v3 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_urem_vec_1: @@ -296,14 +296,14 @@ ; P9BE-NEXT: srwi r5, r5, 6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: mtfprwz f0, r3 ; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; P9BE-NEXT: lxv v5, 0(r3) +; P9BE-NEXT: lxv vs1, 0(r3) ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: vperm v3, v4, v3, v5 +; P9BE-NEXT: xxperm v3, vs0, vs1 ; P9BE-NEXT: mulhwu r5, r3, r4 ; P9BE-NEXT: sub r6, r3, r5 ; P9BE-NEXT: srwi r6, r6, 1 @@ -322,9 +322,9 @@ ; P9BE-NEXT: srwi r4, r4, 6 ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: mtfprwz f0, r3 +; P9BE-NEXT: xxperm v4, vs0, vs1 +; P9BE-NEXT: vmrghw v2, v4, v3 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_urem_vec_2: @@ -523,14 +523,14 @@ ; P9BE-NEXT: srwi r6, r6, 6 ; P9BE-NEXT: mulli r7, r6, 95 ; P9BE-NEXT: sub r3, r3, r7 -; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: mtfprwz f0, r3 ; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; P9BE-NEXT: lxv v5, 0(r3) +; P9BE-NEXT: lxv vs1, 0(r3) ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r7, r3, 16 -; P9BE-NEXT: vperm v3, v4, v3, v5 +; P9BE-NEXT: xxperm v3, vs0, vs1 ; P9BE-NEXT: mulhwu r8, r7, r5 ; P9BE-NEXT: sub r7, r7, r8 ; P9BE-NEXT: srwi r7, r7, 1 @@ -548,16 +548,16 @@ ; P9BE-NEXT: add r5, r8, r5 ; P9BE-NEXT: srwi r5, r5, 6 ; P9BE-NEXT: mulli r8, r5, 95 -; P9BE-NEXT: mtvsrwz v0, r5 ; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: mtvsrwz v4, r6 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: mtfprwz f0, r3 +; P9BE-NEXT: xxperm v4, vs0, vs1 +; P9BE-NEXT: mtfprwz f0, r6 +; P9BE-NEXT: vmrghw v2, v4, v3 ; P9BE-NEXT: mtvsrwz v3, r4 -; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mtvsrwz v4, r7 -; P9BE-NEXT: vperm v4, v0, v4, v5 +; P9BE-NEXT: xxperm v3, vs0, vs1 +; P9BE-NEXT: mtfprwz f0, r5 +; P9BE-NEXT: xxperm v4, vs0, vs1 ; P9BE-NEXT: vmrghw v3, v4, v3 ; P9BE-NEXT: vadduhm v2, v2, v3 ; P9BE-NEXT: blr @@ -734,14 +734,14 @@ ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 26 -; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: mtfprwz f0, r3 ; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; P9BE-NEXT: lxv v5, 0(r3) +; P9BE-NEXT: lxv vs1, 0(r3) ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: vperm v3, v4, v3, v5 +; P9BE-NEXT: xxperm v3, vs0, vs1 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: sub r5, r3, r4 ; P9BE-NEXT: srwi r5, r5, 1 @@ -753,9 +753,9 @@ ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 29 -; P9BE-NEXT: mtvsrwz v2, r3 -; P9BE-NEXT: vperm v2, v2, v4, v5 -; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: mtfprwz f0, r3 +; P9BE-NEXT: xxperm v4, vs0, vs1 +; P9BE-NEXT: vmrghw v2, v3, v4 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_urem_power_of_two: @@ -888,23 +888,23 @@ ; P9BE-NEXT: srwi r4, r4, 4 ; P9BE-NEXT: mulli r4, r4, 23 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: mtfprwz f0, r3 ; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; P9BE-NEXT: lxv v5, 0(r3) +; P9BE-NEXT: lxv vs1, 0(r3) ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r4, r3, 16 ; P9BE-NEXT: rlwinm r3, r3, 31, 17, 31 -; P9BE-NEXT: vperm v3, v4, v3, v5 +; P9BE-NEXT: xxperm v3, vs0, vs1 ; P9BE-NEXT: mulhwu r3, r3, r5 ; P9BE-NEXT: srwi r3, r3, 8 ; P9BE-NEXT: mulli r3, r3, 654 ; P9BE-NEXT: sub r3, r4, r3 ; P9BE-NEXT: mtvsrwz v2, r3 ; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: mtvsrwz v4, r3 -; P9BE-NEXT: vperm v2, v4, v2, v5 +; P9BE-NEXT: mtfprwz f0, r3 +; P9BE-NEXT: xxperm v2, vs0, vs1 ; P9BE-NEXT: vmrghw v2, v2, v3 ; P9BE-NEXT: blr ; Index: llvm/test/CodeGen/PowerPC/vec-itofp.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec-itofp.ll +++ llvm/test/CodeGen/PowerPC/vec-itofp.ll @@ -193,13 +193,13 @@ ; ; CHECK-P9-LABEL: test2: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv v2, 0(r4) +; CHECK-P9-NEXT: lxv vs1, 0(r4) ; CHECK-P9-NEXT: addis r4, r2, .LCPI2_0@toc@ha -; CHECK-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-P9-NEXT: addi r4, r4, .LCPI2_0@toc@l -; CHECK-P9-NEXT: lxv v3, 0(r4) -; CHECK-P9-NEXT: vperm v2, v4, v2, v3 -; CHECK-P9-NEXT: xvcvuxddp vs0, v2 +; CHECK-P9-NEXT: lxv vs2, 0(r4) +; CHECK-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-P9-NEXT: xvcvuxddp vs0, vs1 ; CHECK-P9-NEXT: stxv vs0, 0(r3) ; CHECK-P9-NEXT: blr ; @@ -395,19 +395,20 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v2, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI4_0@toc@ha -; CHECK-BE-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-BE-NEXT: addi r4, r4, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: lxv vs1, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI4_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI4_1@toc@l -; CHECK-BE-NEXT: vperm v3, v3, v2, v4 +; CHECK-BE-NEXT: vmr v3, v2 +; CHECK-BE-NEXT: xxperm v3, vs0, vs1 +; CHECK-BE-NEXT: lxv vs1, 0(r4) ; CHECK-BE-NEXT: vextsh2d v3, v3 +; CHECK-BE-NEXT: xxperm v2, v2, vs1 ; CHECK-BE-NEXT: xvcvsxddp vs0, v3 -; CHECK-BE-NEXT: lxv v3, 0(r4) -; CHECK-BE-NEXT: vperm v2, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: vextsh2d v2, v2 ; CHECK-BE-NEXT: xvcvsxddp vs1, v2 +; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: stxv vs1, 0(r3) ; CHECK-BE-NEXT: blr entry: @@ -441,8 +442,8 @@ ; CHECK-P9-NEXT: lxv v2, 0(r4) ; CHECK-P9-NEXT: addis r4, r2, .LCPI5_0@toc@ha ; CHECK-P9-NEXT: addi r4, r4, .LCPI5_0@toc@l -; CHECK-P9-NEXT: lxv v3, 0(r4) -; CHECK-P9-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-NEXT: lxv vs0, 0(r4) +; CHECK-P9-NEXT: xxperm v2, v2, vs0 ; CHECK-P9-NEXT: vextsh2d v2, v2 ; CHECK-P9-NEXT: xvcvsxddp vs0, v2 ; CHECK-P9-NEXT: stxv vs0, 0(r3) @@ -453,8 +454,8 @@ ; CHECK-BE-NEXT: lxv v2, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI5_0@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI5_0@toc@l -; CHECK-BE-NEXT: lxv v3, 0(r4) -; CHECK-BE-NEXT: vperm v2, v2, v2, v3 +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: xxperm v2, v2, vs0 ; CHECK-BE-NEXT: vextsh2d v2, v2 ; CHECK-BE-NEXT: xvcvsxddp vs0, v2 ; CHECK-BE-NEXT: stxv vs0, 0(r3) Index: llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll @@ -50,19 +50,19 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtfprd f0, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: xscvspdpn f2, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 ; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) +; CHECK-BE-NEXT: lxv vs1, 0(r3) ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mtfprwz f2, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v3, v4, v2 +; CHECK-BE-NEXT: xxperm v2, vs2, vs1 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -130,31 +130,31 @@ ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-NEXT: xxsldwi vs2, v2, v2, 1 ; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v5, v4, v3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: xxswapd vs1, v2 +; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: xxperm v3, vs1, vs0 +; CHECK-BE-NEXT: xscvspdpn f1, v2 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: vperm v2, v5, v2, v3 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: xxperm v2, vs1, vs0 +; CHECK-BE-NEXT: vmrghw v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -273,52 +273,52 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs1 +; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mtfprwz f3, r3 +; CHECK-BE-NEXT: xxperm v2, vs3, vs2 +; CHECK-BE-NEXT: xscvspdpn f3, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs3, vs2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: xxperm v3, vs1, vs2 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxperm v4, vs1, vs2 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: xxmrghd v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <8 x float>, <8 x float>* %0, align 32 @@ -533,103 +533,103 @@ ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs2, 16(r4) -; CHECK-BE-NEXT: addis r5, r2, .LCPI3_0@toc@ha -; CHECK-BE-NEXT: lxv vs1, 0(r4) -; CHECK-BE-NEXT: lxv vs0, 48(r4) -; CHECK-BE-NEXT: addi r5, r5, .LCPI3_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r5) +; CHECK-BE-NEXT: lxv vs0, 32(r4) +; CHECK-BE-NEXT: lxv vs1, 48(r4) +; CHECK-BE-NEXT: lxv vs4, 0(r4) +; CHECK-BE-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-BE-NEXT: addi r4, r4, .LCPI3_0@toc@l +; CHECK-BE-NEXT: lxv vs5, 0(r4) ; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-BE-NEXT: xxswapd vs4, vs2 -; CHECK-BE-NEXT: xscvspdpn f5, vs2 +; CHECK-BE-NEXT: xxswapd vs6, vs2 +; CHECK-BE-NEXT: xscvspdpn f7, vs2 ; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: xxsldwi vs6, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvspdpn f6, vs6 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: xscvdpsxws f6, f6 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: mtvsrwz v2, r4 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: xscvdpsxws f6, f7 +; CHECK-BE-NEXT: mtfprwz f3, r4 +; CHECK-BE-NEXT: xxperm v2, vs3, vs5 +; CHECK-BE-NEXT: xxsldwi vs3, vs4, vs4, 3 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: mtfprwz f6, r4 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs4 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mtvsrwz v3, r4 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xxperm v3, vs6, vs5 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xxsldwi vs3, vs4, vs4, 1 +; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mtfprwz f2, r4 +; CHECK-BE-NEXT: xxperm v4, vs2, vs5 +; CHECK-BE-NEXT: xscvspdpn f2, vs4 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mtfprwz f2, r4 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: xscvdpsxws f4, f5 +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrwz v4, r5 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 +; CHECK-BE-NEXT: xxperm v3, vs2, vs5 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: xscvspdpn f4, vs6 -; CHECK-BE-NEXT: mtvsrwz v4, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: vmrghw v3, v3, v4 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xxmrghd vs3, v3, v2 +; CHECK-BE-NEXT: mtfprwz f2, r4 +; CHECK-BE-NEXT: xxperm v4, vs2, vs5 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mtvsrwz v5, r5 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mtfprwz f2, r4 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-BE-NEXT: mtvsrwz v2, r4 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v2, vs2, vs5 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: mtvsrwz v5, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: xxsldwi vs3, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v0, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs3 -; CHECK-BE-NEXT: vperm v5, v0, v5, v2 -; CHECK-BE-NEXT: mtvsrwz v0, r5 -; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v0, v0, v1, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v4, v0, v5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs0 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: mtfprwz f1, r4 +; CHECK-BE-NEXT: xxperm v3, vs1, vs5 +; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: lxv vs1, 32(r4) -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrwz v6, r5 -; CHECK-BE-NEXT: vperm v1, v6, v1, v2 -; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: xxmrghd vs2, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v5, r4 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 3 -; CHECK-BE-NEXT: stxv vs2, 0(r3) -; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: vperm v3, v5, v3, v2 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghw v3, v3, v1 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: mtfprwz f1, r4 ; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs1 ; CHECK-BE-NEXT: mtvsrwz v4, r4 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xscvspdpn f0, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r4 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd vs0, v2, v3 +; CHECK-BE-NEXT: xxperm v4, vs1, vs5 +; CHECK-BE-NEXT: stxv vs3, 0(r3) +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: xxmrghd vs0, v3, v2 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: @@ -680,19 +680,19 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtfprd f0, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: xscvspdpn f2, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 ; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) +; CHECK-BE-NEXT: lxv vs1, 0(r3) ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mtfprwz f2, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v3, v4, v2 +; CHECK-BE-NEXT: xxperm v2, vs2, vs1 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -760,31 +760,31 @@ ; ; CHECK-BE-LABEL: test4elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-BE-NEXT: xxsldwi vs2, v2, v2, 1 ; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v5, v4, v3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: xxswapd vs1, v2 +; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: xxperm v3, vs1, vs0 +; CHECK-BE-NEXT: xscvspdpn f1, v2 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: mtvsrwz v2, r3 -; CHECK-BE-NEXT: vperm v2, v5, v2, v3 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: xxperm v2, vs1, vs0 +; CHECK-BE-NEXT: vmrghw v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -903,52 +903,52 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs1 +; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mtfprwz f3, r3 +; CHECK-BE-NEXT: xxperm v2, vs3, vs2 +; CHECK-BE-NEXT: xscvspdpn f3, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs3, vs2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: xxperm v3, vs1, vs2 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxperm v4, vs1, vs2 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: xxmrghd v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <8 x float>, <8 x float>* %0, align 32 @@ -1163,103 +1163,103 @@ ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs2, 16(r4) -; CHECK-BE-NEXT: addis r5, r2, .LCPI7_0@toc@ha -; CHECK-BE-NEXT: lxv vs1, 0(r4) -; CHECK-BE-NEXT: lxv vs0, 48(r4) -; CHECK-BE-NEXT: addi r5, r5, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r5) +; CHECK-BE-NEXT: lxv vs0, 32(r4) +; CHECK-BE-NEXT: lxv vs1, 48(r4) +; CHECK-BE-NEXT: lxv vs4, 0(r4) +; CHECK-BE-NEXT: addis r4, r2, .LCPI7_0@toc@ha +; CHECK-BE-NEXT: addi r4, r4, .LCPI7_0@toc@l +; CHECK-BE-NEXT: lxv vs5, 0(r4) ; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-BE-NEXT: xxswapd vs4, vs2 -; CHECK-BE-NEXT: xscvspdpn f5, vs2 +; CHECK-BE-NEXT: xxswapd vs6, vs2 +; CHECK-BE-NEXT: xscvspdpn f7, vs2 ; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: xxsldwi vs6, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvspdpn f6, vs6 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: xscvdpsxws f6, f6 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: mtvsrwz v2, r4 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: xscvdpsxws f6, f7 +; CHECK-BE-NEXT: mtfprwz f3, r4 +; CHECK-BE-NEXT: xxperm v2, vs3, vs5 +; CHECK-BE-NEXT: xxsldwi vs3, vs4, vs4, 3 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: mtfprwz f6, r4 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: xxswapd vs2, vs4 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mtvsrwz v3, r4 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xxperm v3, vs6, vs5 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xxsldwi vs3, vs4, vs4, 1 +; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mtfprwz f2, r4 +; CHECK-BE-NEXT: xxperm v4, vs2, vs5 +; CHECK-BE-NEXT: xscvspdpn f2, vs4 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mtfprwz f2, r4 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: xscvdpsxws f4, f5 +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrwz v4, r5 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 +; CHECK-BE-NEXT: xxperm v3, vs2, vs5 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: xscvspdpn f4, vs6 -; CHECK-BE-NEXT: mtvsrwz v4, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: vmrghw v3, v3, v4 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: mffprwz r4, f3 +; CHECK-BE-NEXT: xxmrghd vs3, v3, v2 +; CHECK-BE-NEXT: mtfprwz f2, r4 +; CHECK-BE-NEXT: xxperm v4, vs2, vs5 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mtvsrwz v5, r5 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mtfprwz f2, r4 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-BE-NEXT: mtvsrwz v2, r4 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v2, vs2, vs5 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: mtvsrwz v5, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: xxsldwi vs3, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v0, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs3 -; CHECK-BE-NEXT: vperm v5, v0, v5, v2 -; CHECK-BE-NEXT: mtvsrwz v0, r5 -; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v0, v0, v1, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v4, v0, v5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs0 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: mtfprwz f1, r4 +; CHECK-BE-NEXT: xxperm v3, vs1, vs5 +; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: lxv vs1, 32(r4) -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrwz v6, r5 -; CHECK-BE-NEXT: vperm v1, v6, v1, v2 -; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: xxmrghd vs2, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v5, r4 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 3 -; CHECK-BE-NEXT: stxv vs2, 0(r3) -; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: vperm v3, v5, v3, v2 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghw v3, v3, v1 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: mtfprwz f1, r4 ; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs1 ; CHECK-BE-NEXT: mtvsrwz v4, r4 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xscvspdpn f0, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r4 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd vs0, v2, v3 +; CHECK-BE-NEXT: xxperm v4, vs1, vs5 +; CHECK-BE-NEXT: stxv vs3, 0(r3) +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: xxmrghd vs0, v3, v2 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: Index: llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll @@ -55,19 +55,19 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtfprd f0, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: xscvspdpn f2, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 ; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) +; CHECK-BE-NEXT: lxv vs1, 0(r3) ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mtfprwz f2, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: vperm v2, v3, v4, v2 +; CHECK-BE-NEXT: xxperm v2, vs2, vs1 ; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 ; CHECK-BE-NEXT: stxsihx v2, 0, r3 ; CHECK-BE-NEXT: lhz r3, -2(r1) @@ -138,32 +138,32 @@ ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-NEXT: xxsldwi vs2, v2, v2, 1 ; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v5, v4, v3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: xxswapd vs1, v2 +; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: xxperm v3, vs1, vs0 +; CHECK-BE-NEXT: xscvspdpn f1, v2 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v5, v2, v3 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 +; CHECK-BE-NEXT: xxperm v2, vs1, vs0 +; CHECK-BE-NEXT: vmrghh v2, v2, v3 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -285,52 +285,52 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs1 +; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mtfprwz f3, r3 +; CHECK-BE-NEXT: xxperm v2, vs3, vs2 +; CHECK-BE-NEXT: xscvspdpn f3, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs3, vs2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: xxperm v3, vs1, vs2 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxperm v4, vs1, vs2 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -550,98 +550,98 @@ ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: xxswapd vs4, vs3 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: xscvspdpn f4, vs3 +; CHECK-BE-NEXT: lxv vs4, 0(r3) +; CHECK-BE-NEXT: xxsldwi vs5, vs3, vs3, 3 +; CHECK-BE-NEXT: xscvspdpn f5, vs5 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: mffprwz r3, f5 +; CHECK-BE-NEXT: xxswapd vs5, vs3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: xscvspdpn f5, vs5 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: mffprwz r3, f5 +; CHECK-BE-NEXT: mtfprwz f5, r3 +; CHECK-BE-NEXT: xxperm v2, vs5, vs4 +; CHECK-BE-NEXT: xscvspdpn f5, vs3 ; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mffprwz r3, f5 +; CHECK-BE-NEXT: mtfprwz f5, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs5, vs4 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mtfprwz f3, r3 +; CHECK-BE-NEXT: xxperm v3, vs3, vs4 ; CHECK-BE-NEXT: xscvspdpn f3, vs2 ; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 +; CHECK-BE-NEXT: xxperm v4, vs3, vs4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mtfprwz f2, r3 +; CHECK-BE-NEXT: xxperm v3, vs2, vs4 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f2, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 +; CHECK-BE-NEXT: xxperm v4, vs2, vs4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: xxperm v4, vs1, vs4 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v5, v0, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v1, r3 -; CHECK-BE-NEXT: vperm v2, v0, v1, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v5 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: xxperm v5, vs1, vs4 +; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: xxmrghd v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <16 x float>, <16 x float>* %0, align 64 @@ -695,19 +695,19 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtfprd f0, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: xscvspdpn f2, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 ; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) +; CHECK-BE-NEXT: lxv vs1, 0(r3) ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mtfprwz f2, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: vperm v2, v3, v4, v2 +; CHECK-BE-NEXT: xxperm v2, vs2, vs1 ; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 ; CHECK-BE-NEXT: stxsihx v2, 0, r3 ; CHECK-BE-NEXT: lhz r3, -2(r1) @@ -778,32 +778,32 @@ ; ; CHECK-BE-LABEL: test4elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: xxsldwi vs1, v2, v2, 3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-BE-NEXT: xxsldwi vs2, v2, v2, 1 ; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v5, v4, v3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: xxswapd vs1, v2 +; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: xxperm v3, vs1, vs0 +; CHECK-BE-NEXT: xscvspdpn f1, v2 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v5, v2, v3 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 +; CHECK-BE-NEXT: xxperm v2, vs1, vs0 +; CHECK-BE-NEXT: vmrghh v2, v2, v3 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -925,52 +925,52 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xxsldwi vs3, vs1, vs1, 3 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs1 +; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mtfprwz f3, r3 +; CHECK-BE-NEXT: xxperm v2, vs3, vs2 +; CHECK-BE-NEXT: xscvspdpn f3, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs3, vs2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: xxperm v3, vs1, vs2 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxperm v4, vs1, vs2 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -1190,98 +1190,98 @@ ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI7_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: xxswapd vs4, vs3 -; CHECK-BE-NEXT: mtvsrwz v3, r3 -; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: xscvspdpn f4, vs3 +; CHECK-BE-NEXT: lxv vs4, 0(r3) +; CHECK-BE-NEXT: xxsldwi vs5, vs3, vs3, 3 +; CHECK-BE-NEXT: xscvspdpn f5, vs5 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: mffprwz r3, f5 +; CHECK-BE-NEXT: xxswapd vs5, vs3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: xscvspdpn f5, vs5 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: mffprwz r3, f5 +; CHECK-BE-NEXT: mtfprwz f5, r3 +; CHECK-BE-NEXT: xxperm v2, vs5, vs4 +; CHECK-BE-NEXT: xscvspdpn f5, vs3 ; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mffprwz r3, f5 +; CHECK-BE-NEXT: mtfprwz f5, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs5, vs4 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mtfprwz f3, r3 +; CHECK-BE-NEXT: xxperm v3, vs3, vs4 ; CHECK-BE-NEXT: xscvspdpn f3, vs2 ; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 +; CHECK-BE-NEXT: xxperm v4, vs3, vs4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mtfprwz f2, r3 +; CHECK-BE-NEXT: xxperm v3, vs2, vs4 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f2, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 +; CHECK-BE-NEXT: xxperm v4, vs2, vs4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: xxperm v4, vs1, vs4 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v5, v0, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v1, r3 -; CHECK-BE-NEXT: vperm v2, v0, v1, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v5 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: xxperm v5, vs1, vs4 +; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: xxmrghd v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <16 x float>, <16 x float>* %0, align 64 Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll @@ -40,18 +40,18 @@ ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xscvdpsxws f0, v2 +; CHECK-BE-NEXT: xxswapd vs2, v2 +; CHECK-BE-NEXT: xscvdpsxws f1, v2 ; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-NEXT: xxperm v2, vs1, vs0 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -117,24 +117,24 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f2, f1 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 +; CHECK-BE-NEXT: xxperm v2, vs3, vs2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: vperm v2, v4, v5, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxperm v3, vs1, vs2 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -242,42 +242,42 @@ ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: lxv vs4, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f5, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f5 +; CHECK-BE-NEXT: mtfprwz f5, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 +; CHECK-BE-NEXT: xxperm v2, vs5, vs4 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs3, vs4 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mtfprwz f2, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs2, vs4 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxperm v4, vs1, vs4 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: xxmrghd v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <8 x double>, <8 x double>* %0, align 64 @@ -463,88 +463,88 @@ ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r4) -; CHECK-BE-NEXT: lxv vs2, 32(r4) -; CHECK-BE-NEXT: lxv vs1, 16(r4) -; CHECK-BE-NEXT: lxv vs0, 0(r4) -; CHECK-BE-NEXT: addis r5, r2, .LCPI3_0@toc@ha -; CHECK-BE-NEXT: addi r5, r5, .LCPI3_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r5) -; CHECK-BE-NEXT: xscvdpsxws f4, f3 -; CHECK-BE-NEXT: xscvdpsxws f5, f2 -; CHECK-BE-NEXT: xscvdpsxws f6, f1 -; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f7, f0 -; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: mtvsrwz v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f5 -; CHECK-BE-NEXT: mtvsrwz v4, r5 -; CHECK-BE-NEXT: mffprwz r5, f6 -; CHECK-BE-NEXT: mtvsrwz v5, r5 -; CHECK-BE-NEXT: mffprwz r5, f7 -; CHECK-BE-NEXT: mtvsrwz v0, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: lxv vs3, 112(r4) -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: lxv vs2, 96(r4) -; CHECK-BE-NEXT: vperm v3, v3, v1, v2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: lxv vs7, 48(r4) +; CHECK-BE-NEXT: lxv vs0, 64(r4) ; CHECK-BE-NEXT: lxv vs1, 80(r4) -; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: lxv vs2, 96(r4) +; CHECK-BE-NEXT: xscvdpsxws f9, f7 +; CHECK-BE-NEXT: xxswapd vs7, vs7 +; CHECK-BE-NEXT: lxv vs3, 112(r4) +; CHECK-BE-NEXT: lxv vs4, 0(r4) +; CHECK-BE-NEXT: lxv vs5, 16(r4) +; CHECK-BE-NEXT: lxv vs6, 32(r4) +; CHECK-BE-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-BE-NEXT: addi r4, r4, .LCPI3_0@toc@l +; CHECK-BE-NEXT: lxv vs8, 0(r4) +; CHECK-BE-NEXT: xscvdpsxws f7, f7 +; CHECK-BE-NEXT: mffprwz r4, f9 +; CHECK-BE-NEXT: mtfprwz f9, r4 +; CHECK-BE-NEXT: mffprwz r4, f7 +; CHECK-BE-NEXT: xscvdpsxws f7, f6 +; CHECK-BE-NEXT: xxswapd vs6, vs6 +; CHECK-BE-NEXT: mtvsrwz v2, r4 +; CHECK-BE-NEXT: xscvdpsxws f6, f6 +; CHECK-BE-NEXT: xxperm v2, vs9, vs8 +; CHECK-BE-NEXT: mffprwz r4, f7 +; CHECK-BE-NEXT: mtfprwz f7, r4 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: xscvdpsxws f6, f5 +; CHECK-BE-NEXT: xxswapd vs5, vs5 +; CHECK-BE-NEXT: mtvsrwz v3, r4 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: xxperm v3, vs7, vs8 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mtfprwz f6, r4 +; CHECK-BE-NEXT: mffprwz r4, f5 +; CHECK-BE-NEXT: xscvdpsxws f5, f4 +; CHECK-BE-NEXT: xxswapd vs4, vs4 +; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: xxperm v4, vs6, vs8 +; CHECK-BE-NEXT: xscvdpsxws f6, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: vperm v4, v4, v1, v2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: mffprwz r5, f0 -; CHECK-BE-NEXT: lxv vs0, 64(r4) ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vperm v5, v5, v1, v2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: vperm v0, v0, v1, v2 +; CHECK-BE-NEXT: mffprwz r4, f5 +; CHECK-BE-NEXT: mtfprwz f5, r4 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: mtfprwz f6, r4 ; CHECK-BE-NEXT: mffprwz r4, f4 -; CHECK-BE-NEXT: vmrghw v5, v0, v5 -; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: xxmrghd vs4, v5, v3 -; CHECK-BE-NEXT: mtvsrwz v3, r4 +; CHECK-BE-NEXT: xxperm v3, vs5, vs8 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 -; CHECK-BE-NEXT: stxv vs4, 0(r3) +; CHECK-BE-NEXT: vmrghw v3, v3, v4 +; CHECK-BE-NEXT: xxmrghd vs4, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v2, r4 ; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: mtfprwz f3, r4 ; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: xxperm v2, vs6, vs8 +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs3, vs8 ; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mtfprwz f2, r4 ; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs2, vs8 ; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: mtfprwz f1, r4 ; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r4 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd vs0, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: xxperm v4, vs1, vs8 +; CHECK-BE-NEXT: stxv vs4, 0(r3) +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: xxmrghd vs0, v3, v2 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: @@ -585,18 +585,18 @@ ; ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xscvdpsxws f0, v2 +; CHECK-BE-NEXT: xxswapd vs2, v2 +; CHECK-BE-NEXT: xscvdpsxws f1, v2 ; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-NEXT: xxperm v2, vs1, vs0 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -662,24 +662,24 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f2, f1 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 +; CHECK-BE-NEXT: xxperm v2, vs3, vs2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 -; CHECK-BE-NEXT: vperm v2, v4, v5, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: xxperm v3, vs1, vs2 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -787,42 +787,42 @@ ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: lxv vs4, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f5, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f5 +; CHECK-BE-NEXT: mtfprwz f5, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 +; CHECK-BE-NEXT: xxperm v2, vs5, vs4 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs3, vs4 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mtfprwz f2, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs2, vs4 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxperm v4, vs1, vs4 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: xxmrghd v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <8 x double>, <8 x double>* %0, align 64 @@ -1008,88 +1008,88 @@ ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs3, 48(r4) -; CHECK-BE-NEXT: lxv vs2, 32(r4) -; CHECK-BE-NEXT: lxv vs1, 16(r4) -; CHECK-BE-NEXT: lxv vs0, 0(r4) -; CHECK-BE-NEXT: addis r5, r2, .LCPI7_0@toc@ha -; CHECK-BE-NEXT: addi r5, r5, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r5) -; CHECK-BE-NEXT: xscvdpsxws f4, f3 -; CHECK-BE-NEXT: xscvdpsxws f5, f2 -; CHECK-BE-NEXT: xscvdpsxws f6, f1 -; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f7, f0 -; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: mtvsrwz v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f5 -; CHECK-BE-NEXT: mtvsrwz v4, r5 -; CHECK-BE-NEXT: mffprwz r5, f6 -; CHECK-BE-NEXT: mtvsrwz v5, r5 -; CHECK-BE-NEXT: mffprwz r5, f7 -; CHECK-BE-NEXT: mtvsrwz v0, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: lxv vs3, 112(r4) -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: lxv vs2, 96(r4) -; CHECK-BE-NEXT: vperm v3, v3, v1, v2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: lxv vs7, 48(r4) +; CHECK-BE-NEXT: lxv vs0, 64(r4) ; CHECK-BE-NEXT: lxv vs1, 80(r4) -; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: lxv vs2, 96(r4) +; CHECK-BE-NEXT: xscvdpsxws f9, f7 +; CHECK-BE-NEXT: xxswapd vs7, vs7 +; CHECK-BE-NEXT: lxv vs3, 112(r4) +; CHECK-BE-NEXT: lxv vs4, 0(r4) +; CHECK-BE-NEXT: lxv vs5, 16(r4) +; CHECK-BE-NEXT: lxv vs6, 32(r4) +; CHECK-BE-NEXT: addis r4, r2, .LCPI7_0@toc@ha +; CHECK-BE-NEXT: addi r4, r4, .LCPI7_0@toc@l +; CHECK-BE-NEXT: lxv vs8, 0(r4) +; CHECK-BE-NEXT: xscvdpsxws f7, f7 +; CHECK-BE-NEXT: mffprwz r4, f9 +; CHECK-BE-NEXT: mtfprwz f9, r4 +; CHECK-BE-NEXT: mffprwz r4, f7 +; CHECK-BE-NEXT: xscvdpsxws f7, f6 +; CHECK-BE-NEXT: xxswapd vs6, vs6 +; CHECK-BE-NEXT: mtvsrwz v2, r4 +; CHECK-BE-NEXT: xscvdpsxws f6, f6 +; CHECK-BE-NEXT: xxperm v2, vs9, vs8 +; CHECK-BE-NEXT: mffprwz r4, f7 +; CHECK-BE-NEXT: mtfprwz f7, r4 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: xscvdpsxws f6, f5 +; CHECK-BE-NEXT: xxswapd vs5, vs5 +; CHECK-BE-NEXT: mtvsrwz v3, r4 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: xxperm v3, vs7, vs8 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mtfprwz f6, r4 +; CHECK-BE-NEXT: mffprwz r4, f5 +; CHECK-BE-NEXT: xscvdpsxws f5, f4 +; CHECK-BE-NEXT: xxswapd vs4, vs4 +; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: xxperm v4, vs6, vs8 +; CHECK-BE-NEXT: xscvdpsxws f6, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: vperm v4, v4, v1, v2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: mffprwz r5, f0 -; CHECK-BE-NEXT: lxv vs0, 64(r4) ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vperm v5, v5, v1, v2 -; CHECK-BE-NEXT: mtvsrwz v1, r5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: vperm v0, v0, v1, v2 +; CHECK-BE-NEXT: mffprwz r4, f5 +; CHECK-BE-NEXT: mtfprwz f5, r4 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: mtfprwz f6, r4 ; CHECK-BE-NEXT: mffprwz r4, f4 -; CHECK-BE-NEXT: vmrghw v5, v0, v5 -; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: xxmrghd vs4, v5, v3 -; CHECK-BE-NEXT: mtvsrwz v3, r4 +; CHECK-BE-NEXT: xxperm v3, vs5, vs8 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v4, v3, v2 -; CHECK-BE-NEXT: stxv vs4, 0(r3) +; CHECK-BE-NEXT: vmrghw v3, v3, v4 +; CHECK-BE-NEXT: xxmrghd vs4, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v2, r4 ; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: mtfprwz f3, r4 ; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: xxperm v2, vs6, vs8 +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs3, vs8 ; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mtfprwz f2, r4 ; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs2, vs8 ; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: mtfprwz f1, r4 ; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r4 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd vs0, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: xxperm v4, vs1, vs8 +; CHECK-BE-NEXT: stxv vs4, 0(r3) +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: xxmrghd vs0, v3, v2 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll @@ -45,18 +45,18 @@ ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xscvdpsxws f0, v2 +; CHECK-BE-NEXT: xxswapd vs2, v2 +; CHECK-BE-NEXT: xscvdpsxws f1, v2 ; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-NEXT: xxperm v2, vs1, vs0 ; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 ; CHECK-BE-NEXT: stxsihx v2, 0, r3 ; CHECK-BE-NEXT: lhz r3, -2(r1) @@ -125,25 +125,25 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f2, f1 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 +; CHECK-BE-NEXT: xxperm v2, vs3, vs2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v4, v5, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: xxperm v3, vs1, vs2 +; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -254,42 +254,42 @@ ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: lxv vs4, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f5, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f5 +; CHECK-BE-NEXT: mtfprwz f5, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 +; CHECK-BE-NEXT: xxperm v2, vs5, vs4 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs3, vs4 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: mtfprwz f2, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs2, vs4 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxperm v4, vs1, vs4 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -479,7 +479,7 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: xscvdpsxws f8, f7 +; CHECK-BE-NEXT: xscvdpsxws f9, f7 ; CHECK-BE-NEXT: xxswapd vs7, vs7 ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs4, 64(r3) @@ -487,76 +487,76 @@ ; CHECK-BE-NEXT: lxv vs6, 96(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) +; CHECK-BE-NEXT: lxv vs8, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f7, f7 -; CHECK-BE-NEXT: mffprwz r3, f8 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f9 +; CHECK-BE-NEXT: mtfprwz f9, r3 ; CHECK-BE-NEXT: mffprwz r3, f7 ; CHECK-BE-NEXT: xscvdpsxws f7, f6 ; CHECK-BE-NEXT: xxswapd vs6, vs6 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f6, f6 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 +; CHECK-BE-NEXT: xxperm v2, vs9, vs8 ; CHECK-BE-NEXT: mffprwz r3, f7 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtfprwz f7, r3 ; CHECK-BE-NEXT: mffprwz r3, f6 ; CHECK-BE-NEXT: xscvdpsxws f6, f5 ; CHECK-BE-NEXT: xxswapd vs5, vs5 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs7, vs8 ; CHECK-BE-NEXT: mffprwz r3, f6 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: mtfprwz f6, r3 ; CHECK-BE-NEXT: mffprwz r3, f5 ; CHECK-BE-NEXT: xscvdpsxws f5, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs6, vs8 ; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f5, r3 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: xxperm v4, vs5, vs8 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: mtfprwz f4, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs4, vs8 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 +; CHECK-BE-NEXT: xxperm v4, vs3, vs8 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: mtfprwz f2, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 +; CHECK-BE-NEXT: xxperm v4, vs2, vs8 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v1, r3 -; CHECK-BE-NEXT: vperm v2, v0, v1, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v5 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: xxperm v5, vs1, vs8 +; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: xxmrghd v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <16 x double>, <16 x double>* %0, align 128 @@ -600,18 +600,18 @@ ; ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xscvdpsxws f0, v2 +; CHECK-BE-NEXT: xxswapd vs2, v2 +; CHECK-BE-NEXT: xscvdpsxws f1, v2 ; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mtfprwz f1, r3 +; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-NEXT: xxperm v2, vs1, vs0 ; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 ; CHECK-BE-NEXT: stxsihx v2, 0, r3 ; CHECK-BE-NEXT: lhz r3, -2(r1) @@ -680,25 +680,25 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f2, f1 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 +; CHECK-BE-NEXT: xxperm v2, vs3, vs2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vperm v2, v4, v5, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: xxperm v3, vs1, vs2 +; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -809,42 +809,42 @@ ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) -; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: lxv vs4, 0(r3) +; CHECK-BE-NEXT: xscvdpsxws f5, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f5 +; CHECK-BE-NEXT: mtfprwz f5, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 +; CHECK-BE-NEXT: xxperm v2, vs5, vs4 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs3, vs4 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: mtfprwz f2, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs2, vs4 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 -; CHECK-BE-NEXT: vperm v2, v5, v0, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: xxperm v4, vs1, vs4 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -1034,7 +1034,7 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: xscvdpsxws f8, f7 +; CHECK-BE-NEXT: xscvdpsxws f9, f7 ; CHECK-BE-NEXT: xxswapd vs7, vs7 ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs4, 64(r3) @@ -1042,76 +1042,76 @@ ; CHECK-BE-NEXT: lxv vs6, 96(r3) ; CHECK-BE-NEXT: addis r3, r2, .LCPI7_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r3) +; CHECK-BE-NEXT: lxv vs8, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f7, f7 -; CHECK-BE-NEXT: mffprwz r3, f8 -; CHECK-BE-NEXT: mtvsrwz v3, r3 +; CHECK-BE-NEXT: mffprwz r3, f9 +; CHECK-BE-NEXT: mtfprwz f9, r3 ; CHECK-BE-NEXT: mffprwz r3, f7 ; CHECK-BE-NEXT: xscvdpsxws f7, f6 ; CHECK-BE-NEXT: xxswapd vs6, vs6 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f6, f6 -; CHECK-BE-NEXT: vperm v3, v3, v4, v2 +; CHECK-BE-NEXT: xxperm v2, vs9, vs8 ; CHECK-BE-NEXT: mffprwz r3, f7 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: mtfprwz f7, r3 ; CHECK-BE-NEXT: mffprwz r3, f6 ; CHECK-BE-NEXT: xscvdpsxws f6, f5 ; CHECK-BE-NEXT: xxswapd vs5, vs5 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs7, vs8 ; CHECK-BE-NEXT: mffprwz r3, f6 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: mtfprwz f6, r3 ; CHECK-BE-NEXT: mffprwz r3, f5 ; CHECK-BE-NEXT: xscvdpsxws f5, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs6, vs8 ; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f5, r3 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: xxperm v4, vs5, vs8 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: mtvsrwz v4, r3 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: mtfprwz f4, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xxperm v3, vs4, vs8 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: mtfprwz f3, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 +; CHECK-BE-NEXT: xxperm v4, vs3, vs8 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: mtfprwz f2, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vperm v5, v5, v0, v2 +; CHECK-BE-NEXT: xxperm v4, vs2, vs8 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: mtvsrwz v1, r3 -; CHECK-BE-NEXT: vperm v2, v0, v1, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v5 -; CHECK-BE-NEXT: vmrghw v2, v2, v4 -; CHECK-BE-NEXT: xxmrghd v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: xxperm v5, vs1, vs8 +; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: xxmrghd v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <16 x double>, <16 x double>* %0, align 128 Index: llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll @@ -80,13 +80,13 @@ ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtfprd f1, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-BE-NEXT: xxlxor v4, v4, v4 +; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: vperm v2, v4, v2, v3 -; CHECK-BE-NEXT: xvcvuxwsp v2, v2 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xxperm vs1, vs0, vs2 +; CHECK-BE-NEXT: xvcvuxwsp v2, vs1 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i64 %a.coerce to <4 x i16> Index: llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll @@ -23,24 +23,24 @@ ; ; CHECK-P9-LABEL: test2elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtvsrwz v2, r3 +; CHECK-P9-NEXT: mtfprwz f1, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-P9-NEXT: lxv v3, 0(r3) -; CHECK-P9-NEXT: vperm v2, v4, v2, v3 -; CHECK-P9-NEXT: xvcvuxddp v2, v2 +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-P9-NEXT: xvcvuxddp v2, vs1 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: xxlxor v4, v4, v4 +; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: vperm v2, v4, v2, v3 -; CHECK-BE-NEXT: xvcvuxddp v2, v2 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xxperm vs1, vs0, vs2 +; CHECK-BE-NEXT: xvcvuxddp v2, vs1 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i32 %a.coerce to <2 x i16> @@ -385,8 +385,8 @@ ; CHECK-P9-NEXT: mtvsrwz v2, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; CHECK-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-P9-NEXT: lxv v3, 0(r3) -; CHECK-P9-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-NEXT: lxv vs0, 0(r3) +; CHECK-P9-NEXT: xxperm v2, v2, vs0 ; CHECK-P9-NEXT: vextsh2d v2, v2 ; CHECK-P9-NEXT: xvcvsxddp v2, v2 ; CHECK-P9-NEXT: blr @@ -396,8 +396,8 @@ ; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: vperm v2, v2, v2, v3 +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxperm v2, v2, vs0 ; CHECK-BE-NEXT: vextsh2d v2, v2 ; CHECK-BE-NEXT: xvcvsxddp v2, v2 ; CHECK-BE-NEXT: blr @@ -459,19 +459,20 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI5_0@toc@ha -; CHECK-BE-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-BE-NEXT: addi r4, r4, .LCPI5_0@toc@l -; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: vmr v3, v2 +; CHECK-BE-NEXT: lxv vs1, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI5_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI5_1@toc@l -; CHECK-BE-NEXT: vperm v3, v3, v2, v4 +; CHECK-BE-NEXT: xxperm v3, vs0, vs1 +; CHECK-BE-NEXT: lxv vs1, 0(r4) ; CHECK-BE-NEXT: vextsh2d v3, v3 +; CHECK-BE-NEXT: xxperm v2, v2, vs1 ; CHECK-BE-NEXT: xvcvsxddp vs0, v3 -; CHECK-BE-NEXT: lxv v3, 0(r4) -; CHECK-BE-NEXT: vperm v2, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: vextsh2d v2, v2 ; CHECK-BE-NEXT: xvcvsxddp vs1, v2 +; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: stxv vs1, 0(r3) ; CHECK-BE-NEXT: blr entry: Index: llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll @@ -75,24 +75,24 @@ ; ; CHECK-P9-LABEL: test4elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtvsrwz v2, r3 +; CHECK-P9-NEXT: mtfprwz f1, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-P9-NEXT: lxv v3, 0(r3) -; CHECK-P9-NEXT: vperm v2, v4, v2, v3 -; CHECK-P9-NEXT: xvcvuxwsp v2, v2 +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-P9-NEXT: xvcvuxwsp v2, vs1 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-BE-NEXT: xxlxor v4, v4, v4 +; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: vperm v2, v4, v2, v3 -; CHECK-BE-NEXT: xvcvuxwsp v2, v2 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xxperm vs1, vs0, vs2 +; CHECK-BE-NEXT: xvcvuxwsp v2, vs1 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i32 %a.coerce to <4 x i8> @@ -334,8 +334,8 @@ ; CHECK-P9-NEXT: mtvsrwz v2, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha ; CHECK-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-P9-NEXT: lxv v3, 0(r3) -; CHECK-P9-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-NEXT: lxv vs0, 0(r3) +; CHECK-P9-NEXT: xxperm v2, v2, vs0 ; CHECK-P9-NEXT: vextsb2w v2, v2 ; CHECK-P9-NEXT: xvcvsxwsp v2, v2 ; CHECK-P9-NEXT: blr @@ -345,8 +345,8 @@ ; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: vperm v2, v2, v2, v3 +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxperm v2, v2, vs0 ; CHECK-BE-NEXT: vextsb2w v2, v2 ; CHECK-BE-NEXT: xvcvsxwsp v2, v2 ; CHECK-BE-NEXT: blr @@ -404,18 +404,18 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha -; CHECK-BE-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: lxv vs1, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_1@toc@l -; CHECK-BE-NEXT: vperm v3, v3, v2, v4 -; CHECK-BE-NEXT: vextsb2w v3, v3 -; CHECK-BE-NEXT: xvcvsxwsp vs0, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) -; CHECK-BE-NEXT: vperm v2, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 +; CHECK-BE-NEXT: xxperm v2, vs0, vs1 ; CHECK-BE-NEXT: vextsb2w v2, v2 +; CHECK-BE-NEXT: xvcvsxwsp vs0, v2 +; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: vextsb2w v2, v3 ; CHECK-BE-NEXT: xvcvsxwsp vs1, v2 ; CHECK-BE-NEXT: stxv vs1, 0(r3) ; CHECK-BE-NEXT: blr Index: llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll @@ -23,24 +23,24 @@ ; ; CHECK-P9-LABEL: test2elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtvsrwz v2, r3 +; CHECK-P9-NEXT: mtfprwz f1, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-P9-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-P9-NEXT: lxv v3, 0(r3) -; CHECK-P9-NEXT: vperm v2, v4, v2, v3 -; CHECK-P9-NEXT: xvcvuxddp v2, v2 +; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: xxperm vs1, vs0, vs2 +; CHECK-P9-NEXT: xvcvuxddp v2, vs1 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: mtfprwz f1, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-NEXT: xxlxor v4, v4, v4 +; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: vperm v2, v4, v2, v3 -; CHECK-BE-NEXT: xvcvuxddp v2, v2 +; CHECK-BE-NEXT: lxv vs2, 0(r3) +; CHECK-BE-NEXT: xxperm vs1, vs0, vs2 +; CHECK-BE-NEXT: xvcvuxddp v2, vs1 ; CHECK-BE-NEXT: blr entry: %0 = bitcast i16 %a.coerce to <2 x i8> @@ -418,8 +418,8 @@ ; CHECK-P9-NEXT: mtvsrwz v2, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; CHECK-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-P9-NEXT: lxv v3, 0(r3) -; CHECK-P9-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-NEXT: lxv vs0, 0(r3) +; CHECK-P9-NEXT: xxperm v2, v2, vs0 ; CHECK-P9-NEXT: vextsb2d v2, v2 ; CHECK-P9-NEXT: xvcvsxddp v2, v2 ; CHECK-P9-NEXT: blr @@ -429,8 +429,8 @@ ; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxv v3, 0(r3) -; CHECK-BE-NEXT: vperm v2, v2, v2, v3 +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: xxperm v2, v2, vs0 ; CHECK-BE-NEXT: vextsb2d v2, v2 ; CHECK-BE-NEXT: xvcvsxddp v2, v2 ; CHECK-BE-NEXT: blr @@ -492,18 +492,18 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrwz v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI5_0@toc@ha -; CHECK-BE-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-NEXT: xxlxor vs0, vs0, vs0 ; CHECK-BE-NEXT: addi r4, r4, .LCPI5_0@toc@l -; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: lxv vs1, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI5_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI5_1@toc@l -; CHECK-BE-NEXT: vperm v3, v3, v2, v4 -; CHECK-BE-NEXT: vextsb2d v3, v3 -; CHECK-BE-NEXT: xvcvsxddp vs0, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) -; CHECK-BE-NEXT: vperm v2, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 +; CHECK-BE-NEXT: xxperm v2, vs0, vs1 ; CHECK-BE-NEXT: vextsb2d v2, v2 +; CHECK-BE-NEXT: xvcvsxddp vs0, v2 +; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: vextsb2d v2, v3 ; CHECK-BE-NEXT: xvcvsxddp vs1, v2 ; CHECK-BE-NEXT: stxv vs1, 0(r3) ; CHECK-BE-NEXT: blr Index: llvm/test/CodeGen/PowerPC/vec_extract_p9.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_extract_p9.ll +++ llvm/test/CodeGen/PowerPC/vec_extract_p9.ll @@ -14,7 +14,6 @@ ; CHECK-BE-NEXT: vextublx 3, 5, 2 ; CHECK-BE-NEXT: clrldi 3, 3, 56 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <16 x i8> %a, i32 %index ret i8 %vecext @@ -32,7 +31,6 @@ ; CHECK-BE-NEXT: vextublx 3, 5, 2 ; CHECK-BE-NEXT: extsb 3, 3 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <16 x i8> %a, i32 %index ret i8 %vecext @@ -52,7 +50,6 @@ ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: clrldi 3, 3, 48 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <8 x i16> %a, i32 %index ret i16 %vecext @@ -72,7 +69,6 @@ ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: extsh 3, 3 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <8 x i16> %a, i32 %index ret i16 %vecext @@ -90,7 +86,6 @@ ; CHECK-BE-NEXT: rlwinm 3, 5, 2, 28, 29 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <4 x i32> %a, i32 %index ret i32 %vecext @@ -110,7 +105,6 @@ ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: extsw 3, 3 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <4 x i32> %a, i32 %index ret i32 %vecext @@ -131,7 +125,6 @@ ; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: clrldi 3, 3, 56 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <16 x i8> %a, i32 1 ret i8 %vecext @@ -151,7 +144,6 @@ ; CHECK-BE-NEXT: vextuhlx 3, 3, 2 ; CHECK-BE-NEXT: clrldi 3, 3, 48 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <8 x i16> %a, i32 1 ret i16 %vecext @@ -169,7 +161,6 @@ ; CHECK-BE-NEXT: li 3, 12 ; CHECK-BE-NEXT: vextuwlx 3, 3, 2 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <4 x i32> %a, i32 3 ret i32 %vecext @@ -180,11 +171,12 @@ ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: addis 3, 2, .LCPI9_0@toc@ha ; CHECK-LE-NEXT: addi 3, 3, .LCPI9_0@toc@l -; CHECK-LE-NEXT: lxv 36, 0(3) +; CHECK-LE-NEXT: lxv 0, 0(3) ; CHECK-LE-NEXT: addis 3, 2, .LCPI9_1@toc@ha -; CHECK-LE-NEXT: lfs 0, .LCPI9_1@toc@l(3) -; CHECK-LE-NEXT: vperm 2, 3, 2, 4 -; CHECK-LE-NEXT: xsadddp 1, 34, 0 +; CHECK-LE-NEXT: lfs 1, .LCPI9_1@toc@l(3) +; CHECK-LE-NEXT: xxperm 35, 34, 0 +; CHECK-LE-NEXT: xxswapd 0, 35 +; CHECK-LE-NEXT: xsadddp 1, 0, 1 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: test10: Index: llvm/test/CodeGen/PowerPC/vec_int_ext.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_int_ext.ll +++ llvm/test/CodeGen/PowerPC/vec_int_ext.ll @@ -12,11 +12,10 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI0_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: vextsb2w 2, 2 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <16 x i8> %a, i32 0 %conv = sext i8 %vecext to i32 @@ -43,11 +42,10 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI1_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI1_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: vextsb2d 2, 2 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <16 x i8> %a, i32 0 %conv = sext i8 %vecext to i64 @@ -68,11 +66,10 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI2_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI2_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: vextsh2w 2, 2 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <8 x i16> %a, i32 0 %conv = sext i16 %vecext to i32 @@ -99,11 +96,10 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI3_0@toc@ha ; CHECK-BE-NEXT: addi 3, 3, .LCPI3_0@toc@l -; CHECK-BE-NEXT: lxv 35, 0(3) -; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: lxv 0, 0(3) +; CHECK-BE-NEXT: xxperm 34, 34, 0 ; CHECK-BE-NEXT: vextsh2d 2, 2 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <8 x i16> %a, i32 0 %conv = sext i16 %vecext to i64 @@ -125,7 +121,6 @@ ; CHECK-BE-NEXT: vmrgew 2, 2, 2 ; CHECK-BE-NEXT: vextsw2d 2, 2 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <4 x i32> %a, i32 0 %conv = sext i32 %vecext to i64 @@ -274,7 +269,6 @@ ; CHECK-BE-NEXT: extsw 4, 4 ; CHECK-BE-NEXT: mtvsrdd 34, 3, 4 ; CHECK-BE-NEXT: blr - entry: %vecext = extractelement <4 x i32> %a, i32 0 %conv = sext i32 %vecext to i64 @@ -353,24 +347,24 @@ ; CHECK-BE-NEXT: extsb 5, 5 ; CHECK-BE-NEXT: extsb 3, 3 ; CHECK-BE-NEXT: extsb 4, 4 -; CHECK-BE-NEXT: mtvsrwz 35, 9 +; CHECK-BE-NEXT: mtfprwz 0, 9 ; CHECK-BE-NEXT: addis 9, 2, .LCPI11_0@toc@ha ; CHECK-BE-NEXT: vextublx 6, 6, 2 ; CHECK-BE-NEXT: mtvsrwz 34, 10 -; CHECK-BE-NEXT: mtvsrwz 37, 7 +; CHECK-BE-NEXT: mtvsrwz 35, 8 ; CHECK-BE-NEXT: extsb 6, 6 -; CHECK-BE-NEXT: mtvsrwz 32, 3 +; CHECK-BE-NEXT: mtvsrwz 36, 4 ; CHECK-BE-NEXT: addi 9, 9, .LCPI11_0@toc@l -; CHECK-BE-NEXT: lxv 36, 0(9) -; CHECK-BE-NEXT: vperm 2, 3, 2, 4 -; CHECK-BE-NEXT: mtvsrwz 35, 8 -; CHECK-BE-NEXT: vperm 3, 5, 3, 4 -; CHECK-BE-NEXT: mtvsrwz 37, 5 +; CHECK-BE-NEXT: lxv 1, 0(9) +; CHECK-BE-NEXT: xxperm 34, 0, 1 +; CHECK-BE-NEXT: mtfprwz 0, 7 +; CHECK-BE-NEXT: xxperm 35, 0, 1 +; CHECK-BE-NEXT: mtfprwz 0, 5 ; CHECK-BE-NEXT: vmrghw 2, 3, 2 ; CHECK-BE-NEXT: mtvsrwz 35, 6 -; CHECK-BE-NEXT: vperm 3, 5, 3, 4 -; CHECK-BE-NEXT: mtvsrwz 37, 4 -; CHECK-BE-NEXT: vperm 4, 0, 5, 4 +; CHECK-BE-NEXT: xxperm 35, 0, 1 +; CHECK-BE-NEXT: mtfprwz 0, 3 +; CHECK-BE-NEXT: xxperm 36, 0, 1 ; CHECK-BE-NEXT: vmrghw 3, 4, 3 ; CHECK-BE-NEXT: xxmrghd 34, 35, 34 ; CHECK-BE-NEXT: blr Index: llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -80,7 +80,6 @@ ; PC64LE9-NEXT: xscvspdpn 1, 1 ; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xscvspdpn 3, 3 -; PC64LE9-NEXT: lxv 36, 0(3) ; PC64LE9-NEXT: xsdivsp 0, 1, 0 ; PC64LE9-NEXT: xxswapd 1, 35 ; PC64LE9-NEXT: xscvspdpn 1, 1 @@ -88,11 +87,12 @@ ; PC64LE9-NEXT: xxsldwi 2, 35, 35, 3 ; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xsdivsp 2, 3, 2 +; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvdpspn 35, 1 +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: xscvdpspn 34, 2 ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: xscvdpspn 35, 0 -; PC64LE9-NEXT: vperm 2, 3, 2, 4 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32( @@ -360,15 +360,15 @@ ; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xscvdpspn 35, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addi 3, 3, .LCPI7_0@toc@l -; PC64LE9-NEXT: lxv 36, 0(3) +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: xscvdpspn 35, 31 +; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE9-NEXT: vperm 2, 3, 2, 4 +; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -675,19 +675,19 @@ ; PC64LE9-NEXT: xscvspdpn 1, 1 ; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xscvspdpn 3, 3 -; PC64LE9-NEXT: lxv 36, 0(3) ; PC64LE9-NEXT: xsmulsp 0, 1, 0 ; PC64LE9-NEXT: xxswapd 1, 35 ; PC64LE9-NEXT: xscvspdpn 1, 1 ; PC64LE9-NEXT: xsmulsp 1, 2, 1 ; PC64LE9-NEXT: xxsldwi 2, 35, 35, 3 +; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xsmulsp 2, 3, 2 ; PC64LE9-NEXT: xscvdpspn 35, 1 +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: xscvdpspn 34, 2 ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: xscvdpspn 35, 0 -; PC64LE9-NEXT: vperm 2, 3, 2, 4 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32( @@ -836,19 +836,19 @@ ; PC64LE9-NEXT: xscvspdpn 1, 1 ; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xscvspdpn 3, 3 -; PC64LE9-NEXT: lxv 36, 0(3) ; PC64LE9-NEXT: xsaddsp 0, 1, 0 ; PC64LE9-NEXT: xxswapd 1, 35 ; PC64LE9-NEXT: xscvspdpn 1, 1 ; PC64LE9-NEXT: xsaddsp 1, 2, 1 ; PC64LE9-NEXT: xxsldwi 2, 35, 35, 3 +; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xsaddsp 2, 3, 2 ; PC64LE9-NEXT: xscvdpspn 35, 1 +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: xscvdpspn 34, 2 ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: xscvdpspn 35, 0 -; PC64LE9-NEXT: vperm 2, 3, 2, 4 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32( @@ -997,19 +997,19 @@ ; PC64LE9-NEXT: xscvspdpn 1, 1 ; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xscvspdpn 3, 3 -; PC64LE9-NEXT: lxv 36, 0(3) ; PC64LE9-NEXT: xssubsp 0, 1, 0 ; PC64LE9-NEXT: xxswapd 1, 35 ; PC64LE9-NEXT: xscvspdpn 1, 1 ; PC64LE9-NEXT: xssubsp 1, 2, 1 ; PC64LE9-NEXT: xxsldwi 2, 35, 35, 3 +; PC64LE9-NEXT: xscvdpspn 0, 0 ; PC64LE9-NEXT: xscvspdpn 2, 2 ; PC64LE9-NEXT: xssubsp 2, 3, 2 ; PC64LE9-NEXT: xscvdpspn 35, 1 +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: xscvdpspn 34, 2 ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: xscvdpspn 35, 0 -; PC64LE9-NEXT: vperm 2, 3, 2, 4 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32( @@ -1153,10 +1153,10 @@ ; PC64LE9-NEXT: xssqrtsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 34, 2 ; PC64LE9-NEXT: xscvdpspn 35, 1 -; PC64LE9-NEXT: xscvdpspn 36, 0 +; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32( @@ -1415,15 +1415,15 @@ ; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xscvdpspn 35, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI32_0@toc@ha -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addi 3, 3, .LCPI32_0@toc@l -; PC64LE9-NEXT: lxv 36, 0(3) +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: xscvdpspn 35, 31 +; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE9-NEXT: vperm 2, 3, 2, 4 +; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -1762,8 +1762,6 @@ } define <3 x float> @constrained_vector_powi_v3f32(<3 x float> %x, i32 %y) #0 { -; -; ; PC64LE-LABEL: constrained_vector_powi_v3f32: ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 @@ -1842,15 +1840,15 @@ ; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xscvdpspn 35, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI37_0@toc@ha -; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload -; PC64LE9-NEXT: ld 30, 48(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addi 3, 3, .LCPI37_0@toc@l -; PC64LE9-NEXT: lxv 36, 0(3) +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: xscvdpspn 35, 31 +; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE9-NEXT: vperm 2, 3, 2, 4 +; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE9-NEXT: ld 30, 48(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2222,14 +2220,14 @@ ; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xscvdpspn 35, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI42_0@toc@ha -; PC64LE9-NEXT: xscvdpspn 36, 31 +; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: addi 3, 3, .LCPI42_0@toc@l +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 3, 3, .LCPI42_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2572,14 +2570,14 @@ ; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xscvdpspn 35, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI47_0@toc@ha -; PC64LE9-NEXT: xscvdpspn 36, 31 +; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: addi 3, 3, .LCPI47_0@toc@l +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 3, 3, .LCPI47_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -2922,14 +2920,14 @@ ; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xscvdpspn 35, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI52_0@toc@ha -; PC64LE9-NEXT: xscvdpspn 36, 31 +; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: addi 3, 3, .LCPI52_0@toc@l +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 3, 3, .LCPI52_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3272,14 +3270,14 @@ ; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xscvdpspn 35, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI57_0@toc@ha -; PC64LE9-NEXT: xscvdpspn 36, 31 +; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: addi 3, 3, .LCPI57_0@toc@l +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 3, 3, .LCPI57_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3622,14 +3620,14 @@ ; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xscvdpspn 35, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI62_0@toc@ha -; PC64LE9-NEXT: xscvdpspn 36, 31 +; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: addi 3, 3, .LCPI62_0@toc@l +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 3, 3, .LCPI62_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -3972,14 +3970,14 @@ ; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xscvdpspn 35, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI67_0@toc@ha -; PC64LE9-NEXT: xscvdpspn 36, 31 +; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: addi 3, 3, .LCPI67_0@toc@l +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 3, 3, .LCPI67_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4322,14 +4320,14 @@ ; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xscvdpspn 35, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI72_0@toc@ha -; PC64LE9-NEXT: xscvdpspn 36, 31 +; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: addi 3, 3, .LCPI72_0@toc@l +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 3, 3, .LCPI72_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -4583,10 +4581,10 @@ ; PC64LE9-NEXT: xsrdpic 0, 0 ; PC64LE9-NEXT: xscvdpspn 34, 2 ; PC64LE9-NEXT: xscvdpspn 35, 1 -; PC64LE9-NEXT: xscvdpspn 36, 0 +; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32( @@ -4810,14 +4808,14 @@ ; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xscvdpspn 35, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI82_0@toc@ha -; PC64LE9-NEXT: xscvdpspn 36, 31 +; PC64LE9-NEXT: xscvdpspn 1, 31 +; PC64LE9-NEXT: addi 3, 3, .LCPI82_0@toc@l +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: vmrghw 2, 3, 2 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: lxv 63, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 56(1) # 8-byte Folded Reload -; PC64LE9-NEXT: addi 3, 3, .LCPI82_0@toc@l ; PC64LE9-NEXT: lfd 30, 48(1) # 8-byte Folded Reload -; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 ; PC64LE9-NEXT: addi 1, 1, 64 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5134,15 +5132,15 @@ ; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xscvdpspn 35, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI87_0@toc@ha -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addi 3, 3, .LCPI87_0@toc@l -; PC64LE9-NEXT: lxv 36, 0(3) +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: xscvdpspn 35, 31 +; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE9-NEXT: vperm 2, 3, 2, 4 +; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5375,15 +5373,15 @@ ; PC64LE9-NEXT: xscvdpspn 34, 1 ; PC64LE9-NEXT: xscvdpspn 35, 30 ; PC64LE9-NEXT: addis 3, 2, .LCPI92_0@toc@ha -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload +; PC64LE9-NEXT: xscvdpspn 0, 31 ; PC64LE9-NEXT: addi 3, 3, .LCPI92_0@toc@l -; PC64LE9-NEXT: lxv 36, 0(3) +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: xscvdpspn 35, 31 +; PC64LE9-NEXT: xxperm 34, 0, 1 +; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload +; PC64LE9-NEXT: lxv 62, 32(1) # 16-byte Folded Reload ; PC64LE9-NEXT: lfd 31, 72(1) # 8-byte Folded Reload -; PC64LE9-NEXT: vperm 2, 3, 2, 4 +; PC64LE9-NEXT: lfd 30, 64(1) # 8-byte Folded Reload ; PC64LE9-NEXT: addi 1, 1, 80 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 @@ -5564,25 +5562,26 @@ ; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3 +; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1 ; PC64LE9-NEXT: xscvspdpn 0, 0 +; PC64LE9-NEXT: xscvspdpn 1, 1 ; PC64LE9-NEXT: xscvdpsxws 0, 0 +; PC64LE9-NEXT: xscvdpsxws 1, 1 ; PC64LE9-NEXT: mffprwz 3, 0 ; PC64LE9-NEXT: xxswapd 0, 34 ; PC64LE9-NEXT: mtvsrwz 35, 3 ; PC64LE9-NEXT: xscvspdpn 0, 0 ; PC64LE9-NEXT: xscvdpsxws 0, 0 ; PC64LE9-NEXT: mffprwz 3, 0 -; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1 ; PC64LE9-NEXT: mtvsrwz 36, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI97_0@toc@ha -; PC64LE9-NEXT: xscvspdpn 0, 0 ; PC64LE9-NEXT: addi 3, 3, .LCPI97_0@toc@l ; PC64LE9-NEXT: vmrghw 3, 4, 3 -; PC64LE9-NEXT: xscvdpsxws 0, 0 -; PC64LE9-NEXT: lxv 36, 0(3) -; PC64LE9-NEXT: mffprwz 3, 0 -; PC64LE9-NEXT: mtvsrwz 34, 3 -; PC64LE9-NEXT: vperm 2, 2, 3, 4 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: mffprwz 3, 1 +; PC64LE9-NEXT: mtfprwz 1, 3 +; PC64LE9-NEXT: xxperm 35, 1, 0 +; PC64LE9-NEXT: vmr 2, 3 ; PC64LE9-NEXT: blr entry: %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32( @@ -5828,19 +5827,19 @@ ; PC64LE9-LABEL: constrained_vector_fptosi_v3i32_v3f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: xscvdpsxws 0, 1 +; PC64LE9-NEXT: xscvdpsxws 1, 3 ; PC64LE9-NEXT: mffprwz 3, 0 ; PC64LE9-NEXT: xscvdpsxws 0, 2 ; PC64LE9-NEXT: mtvsrwz 34, 3 ; PC64LE9-NEXT: mffprwz 3, 0 -; PC64LE9-NEXT: xscvdpsxws 0, 3 ; PC64LE9-NEXT: mtvsrwz 35, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI105_0@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI105_0@toc@l ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: mffprwz 3, 0 -; PC64LE9-NEXT: mtvsrwz 36, 3 -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: mffprwz 3, 1 +; PC64LE9-NEXT: mtfprwz 1, 3 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64( @@ -6058,25 +6057,26 @@ ; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f32: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: xxsldwi 0, 34, 34, 3 +; PC64LE9-NEXT: xxsldwi 1, 34, 34, 1 ; PC64LE9-NEXT: xscvspdpn 0, 0 +; PC64LE9-NEXT: xscvspdpn 1, 1 ; PC64LE9-NEXT: xscvdpuxws 0, 0 +; PC64LE9-NEXT: xscvdpuxws 1, 1 ; PC64LE9-NEXT: mffprwz 3, 0 ; PC64LE9-NEXT: xxswapd 0, 34 ; PC64LE9-NEXT: mtvsrwz 35, 3 ; PC64LE9-NEXT: xscvspdpn 0, 0 ; PC64LE9-NEXT: xscvdpuxws 0, 0 ; PC64LE9-NEXT: mffprwz 3, 0 -; PC64LE9-NEXT: xxsldwi 0, 34, 34, 1 ; PC64LE9-NEXT: mtvsrwz 36, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI113_0@toc@ha -; PC64LE9-NEXT: xscvspdpn 0, 0 ; PC64LE9-NEXT: addi 3, 3, .LCPI113_0@toc@l ; PC64LE9-NEXT: vmrghw 3, 4, 3 -; PC64LE9-NEXT: xscvdpuxws 0, 0 -; PC64LE9-NEXT: lxv 36, 0(3) -; PC64LE9-NEXT: mffprwz 3, 0 -; PC64LE9-NEXT: mtvsrwz 34, 3 -; PC64LE9-NEXT: vperm 2, 2, 3, 4 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: mffprwz 3, 1 +; PC64LE9-NEXT: mtfprwz 1, 3 +; PC64LE9-NEXT: xxperm 35, 1, 0 +; PC64LE9-NEXT: vmr 2, 3 ; PC64LE9-NEXT: blr entry: %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32( @@ -6321,19 +6321,19 @@ ; PC64LE9-LABEL: constrained_vector_fptoui_v3i32_v3f64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: xscvdpuxws 0, 1 +; PC64LE9-NEXT: xscvdpuxws 1, 3 ; PC64LE9-NEXT: mffprwz 3, 0 ; PC64LE9-NEXT: xscvdpuxws 0, 2 ; PC64LE9-NEXT: mtvsrwz 34, 3 ; PC64LE9-NEXT: mffprwz 3, 0 -; PC64LE9-NEXT: xscvdpuxws 0, 3 ; PC64LE9-NEXT: mtvsrwz 35, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI121_0@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI121_0@toc@l ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: mffprwz 3, 0 -; PC64LE9-NEXT: mtvsrwz 36, 3 -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: mffprwz 3, 1 +; PC64LE9-NEXT: mtfprwz 1, 3 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64( @@ -6536,12 +6536,12 @@ ; PC64LE9-NEXT: addi 3, 3, .LCPI129_0@toc@l ; PC64LE9-NEXT: xscvdpspn 34, 0 ; PC64LE9-NEXT: xsrsp 0, 2 +; PC64LE9-NEXT: xsrsp 1, 3 ; PC64LE9-NEXT: xscvdpspn 35, 0 -; PC64LE9-NEXT: xsrsp 0, 3 -; PC64LE9-NEXT: xscvdpspn 36, 0 +; PC64LE9-NEXT: xscvdpspn 1, 1 +; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64( @@ -6749,10 +6749,10 @@ ; PC64LE9-NEXT: xsrdpip 0, 0 ; PC64LE9-NEXT: xscvdpspn 34, 2 ; PC64LE9-NEXT: xscvdpspn 35, 1 -; PC64LE9-NEXT: xscvdpspn 36, 0 +; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32( @@ -6864,10 +6864,10 @@ ; PC64LE9-NEXT: xsrdpim 0, 0 ; PC64LE9-NEXT: xscvdpspn 34, 2 ; PC64LE9-NEXT: xscvdpspn 35, 1 -; PC64LE9-NEXT: xscvdpspn 36, 0 +; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32( @@ -6978,10 +6978,10 @@ ; PC64LE9-NEXT: xsrdpi 0, 0 ; PC64LE9-NEXT: xscvdpspn 34, 2 ; PC64LE9-NEXT: xscvdpspn 35, 1 -; PC64LE9-NEXT: xscvdpspn 36, 0 +; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %round = call <3 x float> @llvm.experimental.constrained.round.v3f32( @@ -7093,10 +7093,10 @@ ; PC64LE9-NEXT: xsrdpiz 0, 0 ; PC64LE9-NEXT: xscvdpspn 34, 2 ; PC64LE9-NEXT: xscvdpspn 35, 1 -; PC64LE9-NEXT: xscvdpspn 36, 0 +; PC64LE9-NEXT: xscvdpspn 0, 0 +; PC64LE9-NEXT: lxv 1, 0(3) ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: blr entry: %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32( @@ -7236,8 +7236,8 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI155_0@toc@ha ; PC64LE9-NEXT: addi 3, 3, .LCPI155_0@toc@l -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 2, 2, 3 +; PC64LE9-NEXT: lxv 0, 0(3) +; PC64LE9-NEXT: xxperm 34, 34, 0 ; PC64LE9-NEXT: vextsh2d 2, 2 ; PC64LE9-NEXT: xvcvsxddp 34, 34 ; PC64LE9-NEXT: blr @@ -7450,13 +7450,14 @@ ; PC64LE9-NEXT: xscvsxdsp 0, 0 ; PC64LE9-NEXT: addi 3, 3, .LCPI161_0@toc@l ; PC64LE9-NEXT: xscvdpspn 36, 0 -; PC64LE9-NEXT: vmrghw 3, 4, 3 -; PC64LE9-NEXT: lxv 36, 0(3) +; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: mfvsrwz 3, 34 -; PC64LE9-NEXT: mtfprwa 0, 3 -; PC64LE9-NEXT: xscvsxdsp 0, 0 -; PC64LE9-NEXT: xscvdpspn 34, 0 -; PC64LE9-NEXT: vperm 2, 2, 3, 4 +; PC64LE9-NEXT: mtfprwa 1, 3 +; PC64LE9-NEXT: xscvsxdsp 1, 1 +; PC64LE9-NEXT: vmrghw 3, 4, 3 +; PC64LE9-NEXT: xscvdpspn 1, 1 +; PC64LE9-NEXT: xxperm 35, 1, 0 +; PC64LE9-NEXT: vmr 2, 3 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> @@ -7516,19 +7517,19 @@ ; PC64LE9-LABEL: constrained_vector_sitofp_v3f32_v3i64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mtfprd 0, 3 +; PC64LE9-NEXT: mtfprd 1, 5 ; PC64LE9-NEXT: addis 3, 2, .LCPI163_0@toc@ha ; PC64LE9-NEXT: xscvsxdsp 0, 0 +; PC64LE9-NEXT: xscvsxdsp 1, 1 ; PC64LE9-NEXT: addi 3, 3, .LCPI163_0@toc@l ; PC64LE9-NEXT: xscvdpspn 34, 0 ; PC64LE9-NEXT: mtfprd 0, 4 +; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvsxdsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 35, 0 -; PC64LE9-NEXT: mtfprd 0, 5 -; PC64LE9-NEXT: xscvsxdsp 0, 0 +; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: xscvdpspn 36, 0 -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> @@ -7798,10 +7799,10 @@ ; PC64LE9-LABEL: constrained_vector_uitofp_v2f64_v2i16: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: addis 3, 2, .LCPI173_0@toc@ha -; PC64LE9-NEXT: xxlxor 36, 36, 36 +; PC64LE9-NEXT: xxlxor 0, 0, 0 ; PC64LE9-NEXT: addi 3, 3, .LCPI173_0@toc@l -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: lxv 1, 0(3) +; PC64LE9-NEXT: xxperm 34, 0, 1 ; PC64LE9-NEXT: xvcvuxddp 34, 34 ; PC64LE9-NEXT: blr entry: @@ -8013,13 +8014,14 @@ ; PC64LE9-NEXT: xscvuxdsp 0, 0 ; PC64LE9-NEXT: addi 3, 3, .LCPI179_0@toc@l ; PC64LE9-NEXT: xscvdpspn 36, 0 -; PC64LE9-NEXT: vmrghw 3, 4, 3 -; PC64LE9-NEXT: lxv 36, 0(3) +; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: mfvsrwz 3, 34 -; PC64LE9-NEXT: mtfprwz 0, 3 -; PC64LE9-NEXT: xscvuxdsp 0, 0 -; PC64LE9-NEXT: xscvdpspn 34, 0 -; PC64LE9-NEXT: vperm 2, 2, 3, 4 +; PC64LE9-NEXT: mtfprwz 1, 3 +; PC64LE9-NEXT: xscvuxdsp 1, 1 +; PC64LE9-NEXT: vmrghw 3, 4, 3 +; PC64LE9-NEXT: xscvdpspn 1, 1 +; PC64LE9-NEXT: xxperm 35, 1, 0 +; PC64LE9-NEXT: vmr 2, 3 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> @@ -8079,19 +8081,19 @@ ; PC64LE9-LABEL: constrained_vector_uitofp_v3f32_v3i64: ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mtfprd 0, 3 +; PC64LE9-NEXT: mtfprd 1, 5 ; PC64LE9-NEXT: addis 3, 2, .LCPI181_0@toc@ha ; PC64LE9-NEXT: xscvuxdsp 0, 0 +; PC64LE9-NEXT: xscvuxdsp 1, 1 ; PC64LE9-NEXT: addi 3, 3, .LCPI181_0@toc@l ; PC64LE9-NEXT: xscvdpspn 34, 0 ; PC64LE9-NEXT: mtfprd 0, 4 +; PC64LE9-NEXT: xscvdpspn 1, 1 ; PC64LE9-NEXT: xscvuxdsp 0, 0 ; PC64LE9-NEXT: xscvdpspn 35, 0 -; PC64LE9-NEXT: mtfprd 0, 5 -; PC64LE9-NEXT: xscvuxdsp 0, 0 +; PC64LE9-NEXT: lxv 0, 0(3) ; PC64LE9-NEXT: vmrghw 2, 3, 2 -; PC64LE9-NEXT: lxv 35, 0(3) -; PC64LE9-NEXT: xscvdpspn 36, 0 -; PC64LE9-NEXT: vperm 2, 4, 2, 3 +; PC64LE9-NEXT: xxperm 34, 1, 0 ; PC64LE9-NEXT: blr entry: %result = call <3 x float> Index: llvm/test/CodeGen/PowerPC/xxperm-tests.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/xxperm-tests.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ +; RUN: -check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ +; RUN: -check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ +; RUN: -check-prefix=P8 + +define <4 x i32> @fromDiffMemConsDConvftoi(float* nocapture readonly %ptr) { +; P9LE-LABEL: fromDiffMemConsDConvftoi: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxv vs0, 0(r3) +; P9LE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; P9LE-NEXT: lxv vs1, 0(r3) +; P9LE-NEXT: xxperm vs0, vs0, vs1 +; P9LE-NEXT: xvcvspsxws v2, vs0 +; P9LE-NEXT: blr +; +; P9BE-LABEL: fromDiffMemConsDConvftoi: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lxv vs0, 0(r3) +; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; P9BE-NEXT: lxv vs1, 0(r3) +; P9BE-NEXT: xxperm vs0, vs0, vs1 +; P9BE-NEXT: xvcvspsxws v2, vs0 +; P9BE-NEXT: blr +; +; P8-LABEL: fromDiffMemConsDConvftoi: +; P8: # %bb.0: # %entry +; P8-NEXT: addis r4, r2, .LCPI0_0@toc@ha +; P8-NEXT: lxvw4x v2, 0, r3 +; P8-NEXT: addi r4, r4, .LCPI0_0@toc@l +; P8-NEXT: lxvw4x v3, 0, r4 +; P8-NEXT: vperm v2, v2, v2, v3 +; P8-NEXT: xvcvspsxws v2, v2 +; P8-NEXT: blr +entry: + %arrayidx = getelementptr inbounds float, float* %ptr, i64 3 + %0 = load float, float* %arrayidx, align 4 + %conv = fptosi float %0 to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2 + %1 = load float, float* %arrayidx1, align 4 + %conv2 = fptosi float %1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 1 + %2 = load float, float* %arrayidx4, align 4 + %conv5 = fptosi float %2 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %3 = load float, float* %ptr, align 4 + %conv8 = fptosi float %3 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +} + +define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(i32* nocapture readonly %ptr, i32 signext %offset) local_unnamed_addr #0 { +; P9LE-LABEL: no_RAUW_in_combine_during_legalize: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r4, 2 +; P9LE-NEXT: xxlxor v4, v4, v4 +; P9LE-NEXT: lxsiwzx v2, r3, r4 +; P9LE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI1_0@toc@l +; P9LE-NEXT: lxv v3, 0(r3) +; P9LE-NEXT: vperm v2, v4, v2, v3 +; P9LE-NEXT: blr +; +; P9BE-LABEL: no_RAUW_in_combine_during_legalize: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r4, 2 +; P9BE-NEXT: xxlxor v3, v3, v3 +; P9BE-NEXT: lxsiwzx v2, r3, r4 +; P9BE-NEXT: vmrghb v2, v2, v3 +; P9BE-NEXT: blr +; +; P8-LABEL: no_RAUW_in_combine_during_legalize: +; P8: # %bb.0: # %entry +; P8-NEXT: sldi r4, r4, 2 +; P8-NEXT: xxlxor v3, v3, v3 +; P8-NEXT: lxsiwzx v2, r3, r4 +; P8-NEXT: vmrghb v2, v2, v3 +; P8-NEXT: blr +entry: + %idx.ext = sext i32 %offset to i64 + %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 %idx.ext + %0 = load i32, i32* %add.ptr, align 4 + %conv = zext i32 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %1 = bitcast <2 x i64> %splat.splatinsert to <16 x i8> + %shuffle = shufflevector <16 x i8> %1, <16 x i8> , <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_vrlqnm(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) { +; P9LE-LABEL: test_vrlqnm: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; P9LE-NEXT: vmr v2, v3 +; P9LE-NEXT: addi r3, r3, .LCPI2_0@toc@l +; P9LE-NEXT: lxv vs0, 0(r3) +; P9LE-NEXT: xxperm v2, v4, vs0 +; P9LE-NEXT: blr +; +; P9BE-LABEL: test_vrlqnm: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; P9BE-NEXT: vmr v2, v4 +; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l +; P9BE-NEXT: lxv vs0, 0(r3) +; P9BE-NEXT: xxperm v2, v3, vs0 +; P9BE-NEXT: blr +; +; P8-LABEL: test_vrlqnm: +; P8: # %bb.0: # %entry +; P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; P8-NEXT: addi r3, r3, .LCPI2_0@toc@l +; P8-NEXT: lxvw4x v2, 0, r3 +; P8-NEXT: vperm v2, v3, v4, v2 +; P8-NEXT: blr +entry: + %0 = bitcast <1 x i128> %b to <16 x i8> + %1 = bitcast <1 x i128> %c to <16 x i8> + %shuffle.i = shufflevector <16 x i8> %0, <16 x i8> %1, <16 x i32> + ret <16 x i8> %shuffle.i +}