diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9576,7 +9576,8 @@ // which is strictly wider than the loaded value by 8 bytes. So we need to // adjust the splat index to point to the correct address in memory. if (IsPermutedLoad) { - assert(isLittleEndian && "Unexpected permuted load on big endian target"); + assert((isLittleEndian || IsFourByte) && + "Unexpected permuted load on big endian target"); SplatIdx += IsFourByte ? 2 : 1; assert((SplatIdx < (IsFourByte ? 4 : 2)) && "Splat of a value outside of the loaded memory"); @@ -9591,6 +9592,11 @@ else Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8; + // If the width of the load is the same as the width of the splat, + // loading with an offset would load the wrong memory. + if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64)) + Offset = 0; + SDValue BasePtr = LD->getBasePtr(); if (Offset != 0) BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), @@ -14210,11 +14216,13 @@ static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl &ShuffV, int LHSMaxIdx, int RHSMinIdx, - int RHSMaxIdx, int HalfVec) { + int RHSMaxIdx, int HalfVec, + int ElemSize, + const PPCSubtarget &Subtarget) { for (int i = 0, e = ShuffV.size(); i < e; i++) { int Idx = ShuffV[i]; if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx)) - ShuffV[i] += HalfVec; + ShuffV[i] += Subtarget.isLittleEndian() ? HalfVec : HalfVec - ElemSize; } } @@ -14223,7 +14231,8 @@ // ( (scalar_to_vector (Ty (extract_elt %a, C)))) // In such a case, just change the shuffle mask to extract the element // from the permuted index. -static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) { +static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { SDLoc dl(OrigSToV); EVT VT = OrigSToV.getValueType(); assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && @@ -14238,7 +14247,9 @@ // for the input to the extract and the output of the scalar_to_vector. if (Idx && VT == OrigVector.getValueType()) { SmallVector NewMask(VT.getVectorNumElements(), -1); - NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue(); + unsigned ResultInElt = VT.getVectorNumElements() / 2; + ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1; + NewMask[ResultInElt] = Idx->getZExtValue(); return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask); } } @@ -14263,9 +14274,9 @@ SDValue Res(SVN, 0); SDLoc dl(SVN); - // None of these combines are useful on big endian systems since the ISA - // already has a big endian bias. - if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX()) + // Without direct moves, there isn't really any point in doing this because + // conversion between scalars and vectors for integers goes through the stack. + if (!Subtarget.hasDirectMove()) return Res; // If this is not a shuffle of a shuffle and the first element comes from @@ -14284,10 +14295,13 @@ SmallVector ShuffV(Mask.begin(), Mask.end()); SDValue SToVLHS = isScalarToVec(LHS); SDValue SToVRHS = isScalarToVec(RHS); + int ElemSize = 0; if (SToVLHS || SToVRHS) { int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements() : SToVRHS.getValueType().getVectorNumElements(); int NumEltsOut = ShuffV.size(); + ElemSize = SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() + : SToVRHS.getValueType().getScalarSizeInBits(); // Initially assume that neither input is permuted. These will be adjusted // accordingly if either input is. @@ -14298,18 +14312,25 @@ // Get the permuted scalar to vector nodes for the source(s) that come from // ISD::SCALAR_TO_VECTOR. + // On big endian systems, this only makes sense for element sizes smaller + // than 64 bits since for 64-bit elements, all instructions already put + // the value into element zero. if (SToVLHS) { + if (!Subtarget.isLittleEndian() && ElemSize >= 64) + return Res; // Set up the values for the shuffle vector fixup. LHSMaxIdx = NumEltsOut / NumEltsIn; - SToVLHS = getSToVPermuted(SToVLHS, DAG); + SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget); if (SToVLHS.getValueType() != LHS.getValueType()) SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS); LHS = SToVLHS; } if (SToVRHS) { + if (!Subtarget.isLittleEndian() && ElemSize >= 64) + return Res; RHSMinIdx = NumEltsOut; RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx; - SToVRHS = getSToVPermuted(SToVRHS, DAG); + SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget); if (SToVRHS.getValueType() != RHS.getValueType()) SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS); RHS = SToVRHS; @@ -14322,7 +14343,7 @@ // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by // HalfVec to refer to the corresponding element in the permuted vector. fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx, - HalfVec); + HalfVec, ElemSize / 8, Subtarget); Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV); // We may have simplified away the shuffle. We won't be able to do anything @@ -14332,12 +14353,13 @@ Mask = cast(Res)->getMask(); } + SDValue TheSplat = Subtarget.isLittleEndian() ? RHS : LHS; // The common case after we commuted the shuffle is that the RHS is a splat // and we have elements coming in from the splat at indices that are not // conducive to using a merge. // Example: // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, - if (!isSplatBV(RHS)) + if (!isSplatBV(TheSplat)) return Res; // We are looking for a mask such that all even elements are from @@ -14347,23 +14369,40 @@ // Adjust the mask so we are pulling in the same index from the splat // as the index from the interesting vector in consecutive elements. - // Example (even elements from first vector): - // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, - if (Mask[0] < NumElts) - for (int i = 1, e = Mask.size(); i < e; i += 2) - ShuffV[i] = (ShuffV[i - 1] + NumElts); - // Example (odd elements from first vector): - // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, - else - for (int i = 0, e = Mask.size(); i < e; i += 2) - ShuffV[i] = (ShuffV[i + 1] + NumElts); + if (Subtarget.isLittleEndian()) { + // Example (even elements from first vector): + // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, + if (Mask[0] < NumElts) + for (int i = 1, e = Mask.size(); i < e; i += 2) + ShuffV[i] = (ShuffV[i - 1] + NumElts); + // Example (odd elements from first vector): + // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, + else + for (int i = 0, e = Mask.size(); i < e; i += 2) + ShuffV[i] = (ShuffV[i + 1] + NumElts); + } else { + // Example (even elements from first vector): + // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> , t1 + if (Mask[0] < NumElts) + for (int i = 0, e = Mask.size(); i < e; i += 2) + ShuffV[i] = ShuffV[i + 1] - NumElts; + // Example (odd elements from first vector): + // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> , t1 + else + for (int i = 1, e = Mask.size(); i < e; i += 2) + ShuffV[i] = ShuffV[i - 1] - NumElts; + } // If the RHS has undefs, we need to remove them since we may have created // a shuffle that adds those instead of the splat value. - SDValue SplatVal = cast(RHS.getNode())->getSplatValue(); - RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal); + SDValue SplatVal = + cast(TheSplat.getNode())->getSplatValue(); + TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal); - Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV); + if (Subtarget.isLittleEndian()) + Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, TheSplat, ShuffV); + else + Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, TheSplat, RHS, ShuffV); return Res; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3066,6 +3066,8 @@ def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; + def : Pat<(v2i64 (scalar_to_vector (i64 (load xoaddr:$src)))), + (SUBREG_TO_REG (i64 1), (XFLOADf64 xoaddr:$src), sub_64)>; } // HasVSX, HasOnlySwappingMemOps, IsBigEndian // Any Power8 VSX subtarget. @@ -3159,8 +3161,7 @@ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>; // v4f32 scalar <-> vector conversions (BE) -def : Pat<(v4f32 (scalar_to_vector f32:$A)), - (v4f32 (XSCVDPSPN $A))>; +defm : ScalToVecWPermute; def : Pat<(f32 (vector_extract v4f32:$S, 0)), (f32 (XSCVSPDPN $S))>; def : Pat<(f32 (vector_extract v4f32:$S, 1)), @@ -3196,10 +3197,14 @@ (v2i64 (SUBREG_TO_REG (i64 1), (LIWAX xoaddr:$src), sub_64))>; def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), (v2i64 (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64))>; -def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), - (v4i32 (XXSLDWIs (LIWZX xoaddr:$src), 1))>; -def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), - (v4f32 (XXSLDWIs (LIWZX xoaddr:$src), 1))>; +defm : ScalToVecWPermute< + v4i32, (i32 (load xoaddr:$src)), + (XXSLDWIs (LIWZX xoaddr:$src), 1), + (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; +defm : ScalToVecWPermute< + v4f32, (f32 (load xoaddr:$src)), + (XXSLDWIs (LIWZX xoaddr:$src), 1), + (SUBREG_TO_REG (i64 1), (LIWZX xoaddr:$src), sub_64)>; def : Pat vector conversions (BE) -def : Pat<(v16i8 (scalar_to_vector i32:$A)), - (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>; -def : Pat<(v8i16 (scalar_to_vector i32:$A)), - (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>; -def : Pat<(v4i32 (scalar_to_vector i32:$A)), - (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>; +defm : ScalToVecWPermute< + v16i8, (i32 i32:$A), + (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64), + (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>; +defm : ScalToVecWPermute< + v8i16, (i32 i32:$A), + (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64), + (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>; +defm : ScalToVecWPermute< + v4i32, (i32 i32:$A), + (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64), + (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>; def : Pat<(v2i64 (scalar_to_vector i64:$A)), (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>; diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll --- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll +++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll @@ -1275,8 +1275,7 @@ ; P8BE-LABEL: spltMemVali: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: lfiwzx f0, 0, r3 -; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P8BE-NEXT: xxspltw v2, vs0, 0 +; P8BE-NEXT: xxspltw v2, vs0, 1 ; P8BE-NEXT: blr ; ; P8LE-LABEL: spltMemVali: @@ -2793,8 +2792,7 @@ ; P8BE-LABEL: spltMemValui: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: lfiwzx f0, 0, r3 -; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P8BE-NEXT: xxspltw v2, vs0, 0 +; P8BE-NEXT: xxspltw v2, vs0, 1 ; P8BE-NEXT: blr ; ; P8LE-LABEL: spltMemValui: diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -5,6 +5,9 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ ; RUN: FileCheck %s --check-prefix=CHECK-P9 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr8 -mattr=-vsx -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-NOVSX @@ -23,6 +26,11 @@ ; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testmrghb: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmrglb v2, v2, v3 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testmrghb: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: vmrghb v2, v3, v2 @@ -47,6 +55,11 @@ ; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testmrghb2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmrglb v2, v3, v2 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testmrghb2: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: addis r3, r2, .LCPI1_0@toc@ha @@ -74,6 +87,11 @@ ; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testmrghh: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmrglh v2, v2, v3 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testmrghh: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: vmrghh v2, v3, v2 @@ -98,6 +116,11 @@ ; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testmrghh2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmrglh v2, v3, v2 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testmrghh2: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: addis r3, r2, .LCPI3_0@toc@ha @@ -125,6 +148,11 @@ ; CHECK-P9-NEXT: vmrglb v2, v3, v2 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testmrglb: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testmrglb: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: vmrglb v2, v3, v2 @@ -149,6 +177,11 @@ ; CHECK-P9-NEXT: vmrglb v2, v2, v3 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testmrglb2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmrghb v2, v3, v2 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testmrglb2: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: addis r3, r2, .LCPI5_0@toc@ha @@ -176,6 +209,11 @@ ; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testmrglh: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testmrglh: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: vmrglh v2, v3, v2 @@ -200,6 +238,11 @@ ; CHECK-P9-NEXT: vmrglh v2, v2, v3 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testmrglh2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testmrglh2: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: addis r3, r2, .LCPI7_0@toc@ha @@ -227,6 +270,11 @@ ; CHECK-P9-NEXT: vmrghw v2, v3, v2 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testmrghw: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmrglw v2, v2, v3 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testmrghw: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: vmrghw v2, v3, v2 @@ -251,6 +299,11 @@ ; CHECK-P9-NEXT: vmrghw v2, v2, v3 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testmrghw2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmrglw v2, v3, v2 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testmrghw2: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: addis r3, r2, .LCPI9_0@toc@ha @@ -278,6 +331,11 @@ ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testmrglw: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testmrglw: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: vmrglw v2, v3, v2 @@ -302,6 +360,11 @@ ; CHECK-P9-NEXT: vmrglw v2, v2, v3 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testmrglw2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testmrglw2: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: addis r3, r2, .LCPI11_0@toc@ha @@ -334,6 +397,16 @@ ; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testmrglb3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxsd v2, 0(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI12_0@toc@ha +; CHECK-BE-NEXT: xxlxor v4, v4, v4 +; CHECK-BE-NEXT: addi r3, r3, .LCPI12_0@toc@l +; CHECK-BE-NEXT: lxvx v3, 0, r3 +; CHECK-BE-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testmrglb3: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: vxor v2, v2, v2 @@ -389,6 +462,20 @@ ; CHECK-P9-NEXT: xxmrghd vs0, vs1, vs0 ; CHECK-P9-NEXT: stxv vs0, 0(r30) ; +; CHECK-BE-LABEL: no_crash_elt0_from_RHS: +; CHECK-BE: # %bb.0: # %test_entry +; CHECK-BE-NEXT: mflr r0 +; CHECK-BE-NEXT: std r0, 16(r1) +; CHECK-BE-NEXT: stdu r1, -128(r1) +; CHECK-BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; CHECK-BE-NEXT: mr r30, r3 +; CHECK-BE-NEXT: bl dummy +; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: xxlxor f0, f0, f0 +; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-BE-NEXT: xxmrghd vs0, vs0, vs1 +; CHECK-BE-NEXT: stxv vs0, 0(r30) +; ; CHECK-NOVSX-LABEL: no_crash_elt0_from_RHS: ; CHECK-NOVSX: # %bb.0: # %test_entry ; CHECK-NOVSX-NEXT: mflr r0 @@ -438,6 +525,12 @@ ; CHECK-P9-NEXT: mtvsrws v2, r3 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: no_crash_bitcast: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mtvsrws v2, r3 +; CHECK-BE-NEXT: vmrghw v2, v2, v2 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: no_crash_bitcast: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: addis r4, r2, .LCPI14_0@toc@ha @@ -482,6 +575,17 @@ ; CHECK-P9-NEXT: vmrgow v2, v3, v2 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: replace_undefs_in_splat: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis r3, r2, .LCPI15_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI15_0@toc@l +; CHECK-BE-NEXT: lxvx v3, 0, r3 +; CHECK-BE-NEXT: addis r3, r2, .LCPI15_1@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI15_1@toc@l +; CHECK-BE-NEXT: lxvx v4, 0, r3 +; CHECK-BE-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: replace_undefs_in_splat: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: addis r3, r2, .LCPI15_0@toc@ha @@ -531,6 +635,14 @@ ; CHECK-P9-NEXT: vperm v2, v4, v2, v3 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: no_RAUW_in_combine_during_legalize: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r4, r4, 2 +; CHECK-BE-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-NEXT: lxsiwzx v2, r3, r4 +; CHECK-BE-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: no_RAUW_in_combine_during_legalize: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: sldi r4, r4, 2 @@ -577,6 +689,12 @@ ; CHECK-P9-NEXT: lxvwsx v2, 0, r3 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testSplat4Low: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addi r3, r3, 4 +; CHECK-BE-NEXT: lxvwsx v2, 0, r3 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testSplat4Low: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: ld r3, 0(r3) @@ -611,6 +729,11 @@ ; CHECK-P9-NEXT: lxvwsx v2, 0, r3 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testSplat4hi: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvwsx v2, 0, r3 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testSplat4hi: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: ld r3, 0(r3) @@ -644,6 +767,11 @@ ; CHECK-P9-NEXT: lxvdsx v2, 0, r3 ; CHECK-P9-NEXT: blr ; +; CHECK-BE-LABEL: testSplat8: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvdsx v2, 0, r3 +; CHECK-BE-NEXT: blr +; ; CHECK-NOVSX-LABEL: testSplat8: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: ld r3, 0(r3) diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll @@ -482,7 +482,7 @@ ; P8-NEXT: xsrsp f0, f0 ; P8-NEXT: xscvdpspn v2, f1 ; P8-NEXT: xscvdpspn v3, f0 -; P8-NEXT: vmrghw v2, v2, v3 +; P8-NEXT: vmrgow v2, v2, v3 ; P8-NEXT: blr ; ; P9-LABEL: fptrunc_v2f32_v2f64: diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll --- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll +++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll @@ -2,9 +2,16 @@ ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ ; RUN: -check-prefix=P9 -; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ +; RUN: -check-prefix=P9LE +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ ; RUN: -check-prefix=P8 +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ +; RUN: -check-prefix=P8LE + define dso_local void @test(<2 x double>* nocapture %c, double* nocapture readonly %a) local_unnamed_addr { ; P9-LABEL: test: ; P9: # %bb.0: # %entry @@ -13,12 +20,26 @@ ; P9-NEXT: stxv vs0, 0(r3) ; P9-NEXT: blr ; +; P9LE-LABEL: test: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r4, r4, 24 +; P9LE-NEXT: lxvdsx vs0, 0, r4 +; P9LE-NEXT: stxv vs0, 0(r3) +; P9LE-NEXT: blr +; ; P8-LABEL: test: ; P8: # %bb.0: # %entry ; P8-NEXT: addi r4, r4, 24 ; P8-NEXT: lxvdsx vs0, 0, r4 ; P8-NEXT: stxvd2x vs0, 0, r3 ; P8-NEXT: blr +; +; P8LE-LABEL: test: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r4, r4, 24 +; P8LE-NEXT: lxvdsx vs0, 0, r4 +; P8LE-NEXT: stxvd2x vs0, 0, r3 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds double, double* %a, i64 3 %0 = load double, double* %arrayidx, align 8 @@ -36,13 +57,28 @@ ; P9-NEXT: stxv vs0, 0(r3) ; P9-NEXT: blr ; +; P9LE-LABEL: test2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r4, r4, 12 +; P9LE-NEXT: lxvwsx vs0, 0, r4 +; P9LE-NEXT: stxv vs0, 0(r3) +; P9LE-NEXT: blr +; ; P8-LABEL: test2: ; P8: # %bb.0: # %entry ; P8-NEXT: addi r4, r4, 12 ; P8-NEXT: lfiwzx f0, 0, r4 -; P8-NEXT: xxspltw v2, vs0, 1 -; P8-NEXT: stvx v2, 0, r3 +; P8-NEXT: xxspltw vs0, vs0, 1 +; P8-NEXT: stxvw4x vs0, 0, r3 ; P8-NEXT: blr +; +; P8LE-LABEL: test2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r4, r4, 12 +; P8LE-NEXT: lfiwzx f0, 0, r4 +; P8LE-NEXT: xxspltw v2, vs0, 1 +; P8LE-NEXT: stvx v2, 0, r3 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %a, i64 3 %0 = load float, float* %arrayidx, align 4 @@ -60,13 +96,28 @@ ; P9-NEXT: stxv vs0, 0(r3) ; P9-NEXT: blr ; +; P9LE-LABEL: test3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r4, r4, 12 +; P9LE-NEXT: lxvwsx vs0, 0, r4 +; P9LE-NEXT: stxv vs0, 0(r3) +; P9LE-NEXT: blr +; ; P8-LABEL: test3: ; P8: # %bb.0: # %entry ; P8-NEXT: addi r4, r4, 12 ; P8-NEXT: lfiwzx f0, 0, r4 -; P8-NEXT: xxspltw v2, vs0, 1 -; P8-NEXT: stvx v2, 0, r3 +; P8-NEXT: xxspltw vs0, vs0, 1 +; P8-NEXT: stxvw4x vs0, 0, r3 ; P8-NEXT: blr +; +; P8LE-LABEL: test3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r4, r4, 12 +; P8LE-NEXT: lfiwzx f0, 0, r4 +; P8LE-NEXT: xxspltw v2, vs0, 1 +; P8LE-NEXT: stvx v2, 0, r3 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, i32* %a, i64 3 %0 = load i32, i32* %arrayidx, align 4 @@ -84,12 +135,26 @@ ; P9-NEXT: stxv vs0, 0(r3) ; P9-NEXT: blr ; +; P9LE-LABEL: test4: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r4, r4, 24 +; P9LE-NEXT: lxvdsx vs0, 0, r4 +; P9LE-NEXT: stxv vs0, 0(r3) +; P9LE-NEXT: blr +; ; P8-LABEL: test4: ; P8: # %bb.0: # %entry ; P8-NEXT: addi r4, r4, 24 ; P8-NEXT: lxvdsx vs0, 0, r4 ; P8-NEXT: stxvd2x vs0, 0, r3 ; P8-NEXT: blr +; +; P8LE-LABEL: test4: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r4, r4, 24 +; P8LE-NEXT: lxvdsx vs0, 0, r4 +; P8LE-NEXT: stxvd2x vs0, 0, r3 +; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds i64, i64* %a, i64 3 %0 = load i64, i64* %arrayidx, align 8 @@ -105,11 +170,22 @@ ; P9-NEXT: lxvwsx v2, 0, r3 ; P9-NEXT: blr ; +; P9LE-LABEL: unadjusted_lxvwsx: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxvwsx v2, 0, r3 +; P9LE-NEXT: blr +; ; P8-LABEL: unadjusted_lxvwsx: ; P8: # %bb.0: # %entry ; P8-NEXT: lfiwzx f0, 0, r3 ; P8-NEXT: xxspltw v2, vs0, 1 ; P8-NEXT: blr +; +; P8LE-LABEL: unadjusted_lxvwsx: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: xxspltw v2, vs0, 1 +; P8LE-NEXT: blr entry: %0 = bitcast i32* %s to <4 x i8>* %1 = load <4 x i8>, <4 x i8>* %0, align 4 @@ -124,11 +200,23 @@ ; P9-NEXT: lxvwsx v2, 0, r3 ; P9-NEXT: blr ; +; P9LE-LABEL: adjusted_lxvwsx: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 4 +; P9LE-NEXT: lxvwsx v2, 0, r3 +; P9LE-NEXT: blr +; ; P8-LABEL: adjusted_lxvwsx: ; P8: # %bb.0: # %entry ; P8-NEXT: lfdx f0, 0, r3 -; P8-NEXT: xxspltw v2, vs0, 0 +; P8-NEXT: xxspltw v2, vs0, 1 ; P8-NEXT: blr +; +; P8LE-LABEL: adjusted_lxvwsx: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfdx f0, 0, r3 +; P8LE-NEXT: xxspltw v2, vs0, 0 +; P8LE-NEXT: blr entry: %0 = bitcast i64* %s to <8 x i8>* %1 = load <8 x i8>, <8 x i8>* %0, align 8 @@ -142,11 +230,22 @@ ; P9-NEXT: lxvwsx v2, 0, r3 ; P9-NEXT: blr ; +; P9LE-LABEL: unadjusted_lxvwsx_v16i8: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxvwsx v2, 0, r3 +; P9LE-NEXT: blr +; ; P8-LABEL: unadjusted_lxvwsx_v16i8: ; P8: # %bb.0: # %entry -; P8-NEXT: lvx v2, 0, r3 -; P8-NEXT: xxspltw v2, v2, 3 +; P8-NEXT: lxvw4x vs0, 0, r3 +; P8-NEXT: xxspltw v2, vs0, 0 ; P8-NEXT: blr +; +; P8LE-LABEL: unadjusted_lxvwsx_v16i8: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: xxspltw v2, v2, 3 +; P8LE-NEXT: blr entry: %0 = load <16 x i8>, <16 x i8>* %s, align 16 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> @@ -160,11 +259,23 @@ ; P9-NEXT: lxvwsx v2, 0, r3 ; P9-NEXT: blr ; +; P9LE-LABEL: adjusted_lxvwsx_v16i8: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 4 +; P9LE-NEXT: lxvwsx v2, 0, r3 +; P9LE-NEXT: blr +; ; P8-LABEL: adjusted_lxvwsx_v16i8: ; P8: # %bb.0: # %entry -; P8-NEXT: lvx v2, 0, r3 -; P8-NEXT: xxspltw v2, v2, 2 +; P8-NEXT: lxvw4x vs0, 0, r3 +; P8-NEXT: xxspltw v2, vs0, 1 ; P8-NEXT: blr +; +; P8LE-LABEL: adjusted_lxvwsx_v16i8: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: xxspltw v2, v2, 2 +; P8LE-NEXT: blr entry: %0 = load <16 x i8>, <16 x i8>* %s, align 16 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> @@ -178,11 +289,23 @@ ; P9-NEXT: lxvwsx v2, 0, r3 ; P9-NEXT: blr ; +; P9LE-LABEL: adjusted_lxvwsx_v16i8_2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 8 +; P9LE-NEXT: lxvwsx v2, 0, r3 +; P9LE-NEXT: blr +; ; P8-LABEL: adjusted_lxvwsx_v16i8_2: ; P8: # %bb.0: # %entry -; P8-NEXT: lvx v2, 0, r3 -; P8-NEXT: xxspltw v2, v2, 1 +; P8-NEXT: lxvw4x vs0, 0, r3 +; P8-NEXT: xxspltw v2, vs0, 2 ; P8-NEXT: blr +; +; P8LE-LABEL: adjusted_lxvwsx_v16i8_2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: xxspltw v2, v2, 1 +; P8LE-NEXT: blr entry: %0 = load <16 x i8>, <16 x i8>* %s, align 16 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> @@ -196,11 +319,23 @@ ; P9-NEXT: lxvwsx v2, 0, r3 ; P9-NEXT: blr ; +; P9LE-LABEL: adjusted_lxvwsx_v16i8_3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 12 +; P9LE-NEXT: lxvwsx v2, 0, r3 +; P9LE-NEXT: blr +; ; P8-LABEL: adjusted_lxvwsx_v16i8_3: ; P8: # %bb.0: # %entry -; P8-NEXT: lvx v2, 0, r3 -; P8-NEXT: xxspltw v2, v2, 0 +; P8-NEXT: lxvw4x vs0, 0, r3 +; P8-NEXT: xxspltw v2, vs0, 3 ; P8-NEXT: blr +; +; P8LE-LABEL: adjusted_lxvwsx_v16i8_3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: xxspltw v2, v2, 0 +; P8LE-NEXT: blr entry: %0 = load <16 x i8>, <16 x i8>* %s, align 16 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> @@ -213,10 +348,20 @@ ; P9-NEXT: lxvdsx v2, 0, r3 ; P9-NEXT: blr ; +; P9LE-LABEL: unadjusted_lxvdsx: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxvdsx v2, 0, r3 +; P9LE-NEXT: blr +; ; P8-LABEL: unadjusted_lxvdsx: ; P8: # %bb.0: # %entry ; P8-NEXT: lxvdsx v2, 0, r3 ; P8-NEXT: blr +; +; P8LE-LABEL: unadjusted_lxvdsx: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvdsx v2, 0, r3 +; P8LE-NEXT: blr entry: %0 = bitcast i64* %s to <8 x i8>* %1 = load <8 x i8>, <8 x i8>* %0, align 8 @@ -230,10 +375,20 @@ ; P9-NEXT: lxvdsx v2, 0, r3 ; P9-NEXT: blr ; +; P9LE-LABEL: unadjusted_lxvdsx_v16i8: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lxvdsx v2, 0, r3 +; P9LE-NEXT: blr +; ; P8-LABEL: unadjusted_lxvdsx_v16i8: ; P8: # %bb.0: # %entry ; P8-NEXT: lxvdsx v2, 0, r3 ; P8-NEXT: blr +; +; P8LE-LABEL: unadjusted_lxvdsx_v16i8: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxvdsx v2, 0, r3 +; P8LE-NEXT: blr entry: %0 = load <16 x i8>, <16 x i8>* %s, align 16 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> @@ -247,11 +402,23 @@ ; P9-NEXT: lxvdsx v2, 0, r3 ; P9-NEXT: blr ; +; P9LE-LABEL: adjusted_lxvdsx_v16i8: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 8 +; P9LE-NEXT: lxvdsx v2, 0, r3 +; P9LE-NEXT: blr +; ; P8-LABEL: adjusted_lxvdsx_v16i8: ; P8: # %bb.0: # %entry ; P8-NEXT: addi r3, r3, 8 ; P8-NEXT: lxvdsx v2, 0, r3 ; P8-NEXT: blr +; +; P8LE-LABEL: adjusted_lxvdsx_v16i8: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 8 +; P8LE-NEXT: lxvdsx v2, 0, r3 +; P8LE-NEXT: blr entry: %0 = load <16 x i8>, <16 x i8>* %s, align 16 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> diff --git a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll --- a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll +++ b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s \ ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck --check-prefix=CHECK-LE \ ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s @@ -11,13 +12,13 @@ ; CHECK-LE-NEXT: lfiwzx f0, 0, r3 ; CHECK-LE-NEXT: xxspltw v2, vs0, 1 ; CHECK-LE-NEXT: blr - +; ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lfiwzx f0, 0, r3 -; CHECK-NEXT: xxsldwi vs0, f0, f0, 1 -; CHECK-NEXT: xxspltw v2, vs0, 0 +; CHECK-NEXT: xxspltw v2, vs0, 1 ; CHECK-NEXT: blr + entry: %0 = bitcast i32* %s to <4 x i8>* %1 = load <4 x i8>, <4 x i8>* %0, align 4 diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll --- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr8 -relocation-model=pic \ ; RUN: | FileCheck %s @@ -13,561 +14,823 @@ ; Function Attrs: norecurse nounwind readnone define <16 x i8> @buildc(i8 zeroext %a) { +; CHECK-LABEL: buildc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrwz v2, r3 +; CHECK-NEXT: vspltb v2, v2, 7 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: buildc: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mtvsrd v2, r3 +; CHECK-LE-NEXT: vspltb v2, v2, 7 +; CHECK-LE-NEXT: blr entry: %splat.splatinsert = insertelement <16 x i8> undef, i8 %a, i32 0 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer ret <16 x i8> %splat.splat -; CHECK-LABEL: buildc -; CHECK: sldi r3, r3, 56 -; CHECK: mtvsrd v2, r3 -; CHECK-LE-LABEL: buildc -; CHECK-LE: mtvsrd v2, r3 -; CHECK-LE: vspltb v2, v2, 7 } ; Function Attrs: norecurse nounwind readnone define <8 x i16> @builds(i16 zeroext %a) { +; CHECK-LABEL: builds: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrwz v2, r3 +; CHECK-NEXT: vsplth v2, v2, 3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: builds: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mtvsrd v2, r3 +; CHECK-LE-NEXT: vsplth v2, v2, 3 +; CHECK-LE-NEXT: blr entry: %splat.splatinsert = insertelement <8 x i16> undef, i16 %a, i32 0 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer ret <8 x i16> %splat.splat -; CHECK-LABEL: builds -; CHECK: sldi r3, r3, 48 -; CHECK: mtvsrd v2, r3 -; CHECK-LE-LABEL: builds -; CHECK-LE: mtvsrd v2, r3 -; CHECK-LE: vsplth v2, v2, 3 } ; Function Attrs: norecurse nounwind readnone define <4 x i32> @buildi(i32 zeroext %a) { +; CHECK-LABEL: buildi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xxspltw v2, vs0, 1 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: buildi: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mtfprwz f0, r3 +; CHECK-LE-NEXT: xxspltw v2, vs0, 1 +; CHECK-LE-NEXT: blr entry: %splat.splatinsert = insertelement <4 x i32> undef, i32 %a, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat.splat -; CHECK-LABEL: buildi -; CHECK: mtfprwz f0, r3 -; CHECK: xxspltw v2, vs0, 1 -; CHECK-LE-LABEL: buildi -; CHECK-LE: mtfprwz f0, r3 -; CHECK-LE: xxspltw v2, vs0, 1 } ; Function Attrs: norecurse nounwind readnone define <2 x i64> @buildl(i64 %a) { +; CHECK-LABEL: buildl: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: xxspltd v2, vs0, 0 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: buildl: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mtfprd f0, r3 +; CHECK-LE-NEXT: xxspltd v2, vs0, 0 +; CHECK-LE-NEXT: blr entry: %splat.splatinsert = insertelement <2 x i64> undef, i64 %a, i32 0 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer ret <2 x i64> %splat.splat -; CHECK-LABEL: buildl -; CHECK: mtfprd f0, r3 -; CHECK-LE-LABEL: buildl -; CHECK-LE: mtfprd f0, r3 -; CHECK-LE: xxspltd v2, vs0, 0 } ; Function Attrs: norecurse nounwind readnone define <4 x float> @buildf(float %a) { +; CHECK-LABEL: buildf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpspn vs0, f1 +; CHECK-NEXT: xxspltw v2, vs0, 0 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: buildf: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xscvdpspn vs0, f1 +; CHECK-LE-NEXT: xxspltw v2, vs0, 0 +; CHECK-LE-NEXT: blr entry: %splat.splatinsert = insertelement <4 x float> undef, float %a, i32 0 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %splat.splat -; CHECK-LABEL: buildf -; CHECK: xscvdpspn vs0, f1 -; CHECK: xxspltw v2, vs0, 0 -; CHECK-LE-LABEL: buildf -; CHECK-LE: xscvdpspn vs0, f1 -; CHECK-LE: xxspltw v2, vs0, 0 } ; The optimization to remove stack operations from PPCDAGToDAGISel::Select ; should still trigger for v2f64, producing an lxvdsx. ; Function Attrs: norecurse nounwind readonly define <2 x double> @buildd() { +; CHECK-LABEL: buildd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: lxvdsx v2, 0, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: buildd: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-LE-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-LE-NEXT: lxvdsx v2, 0, r3 +; CHECK-LE-NEXT: blr entry: %0 = load double, double* @d, align 8 %splat.splatinsert = insertelement <2 x double> undef, double %0, i32 0 %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer ret <2 x double> %splat.splat -; CHECK-LABEL: buildd -; CHECK: ld r3, .LC0@toc@l(r3) -; CHECK: lxvdsx v2, 0, r3 -; CHECK-LE-LABEL: buildd -; CHECK-LE: ld r3, .LC0@toc@l(r3) -; CHECK-LE: lxvdsx v2, 0, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc0(<16 x i8> %vsc) { +; CHECK-LABEL: getsc0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 8, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc0: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 0 ret i8 %vecext -; CHECK-LABEL: @getsc0 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 8, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc0 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: clrldi r3, r3, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc1(<16 x i8> %vsc) { +; CHECK-LABEL: getsc1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 16, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc1: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 56, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 1 ret i8 %vecext -; CHECK-LABEL: @getsc1 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 16, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc1 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 56, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc2(<16 x i8> %vsc) { +; CHECK-LABEL: getsc2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 24, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc2: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 48, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 2 ret i8 %vecext -; CHECK-LABEL: @getsc2 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 24, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc2 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 48, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc3(<16 x i8> %vsc) { +; CHECK-LABEL: getsc3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 32, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc3: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 40, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 3 ret i8 %vecext -; CHECK-LABEL: @getsc3 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 32, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc3 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 40, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc4(<16 x i8> %vsc) { +; CHECK-LABEL: getsc4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 40, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc4: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 32, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 4 ret i8 %vecext -; CHECK-LABEL: @getsc4 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 40, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc4 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 32, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc5(<16 x i8> %vsc) { +; CHECK-LABEL: getsc5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 48, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc5: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 24, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 5 ret i8 %vecext -; CHECK-LABEL: @getsc5 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 48, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc5 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 24, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc6(<16 x i8> %vsc) { +; CHECK-LABEL: getsc6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 56, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc6: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 16, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 6 ret i8 %vecext -; CHECK-LABEL: @getsc6 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 56, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc6 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 16, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc7(<16 x i8> %vsc) { +; CHECK-LABEL: getsc7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc7: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 8, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 7 ret i8 %vecext -; CHECK-LABEL: @getsc7 -; CHECK: mfvsrd r3, v2 -; CHECK: clrldi r3, r3, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc7 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 8, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc8(<16 x i8> %vsc) { +; CHECK-LABEL: getsc8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 8, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc8: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 8 ret i8 %vecext -; CHECK-LABEL: @getsc8 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 8, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc8 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: clrldi r3, r3, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc9(<16 x i8> %vsc) { +; CHECK-LABEL: getsc9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 16, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc9: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 56, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 9 ret i8 %vecext -; CHECK-LABEL: @getsc9 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 16, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc9 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 56, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc10(<16 x i8> %vsc) { +; CHECK-LABEL: getsc10: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 24, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc10: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 48, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 10 ret i8 %vecext -; CHECK-LABEL: @getsc10 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 24, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc10 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 48, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc11(<16 x i8> %vsc) { +; CHECK-LABEL: getsc11: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 32, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc11: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 40, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 11 ret i8 %vecext -; CHECK-LABEL: @getsc11 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 32, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc11 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 40, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc12(<16 x i8> %vsc) { +; CHECK-LABEL: getsc12: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 40, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc12: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 32, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 12 ret i8 %vecext -; CHECK-LABEL: @getsc12 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 40, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc12 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 32, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc13(<16 x i8> %vsc) { +; CHECK-LABEL: getsc13: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 48, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc13: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 24, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 13 ret i8 %vecext -; CHECK-LABEL: @getsc13 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 48, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc13 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 24, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc14(<16 x i8> %vsc) { +; CHECK-LABEL: getsc14: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 56, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc14: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 16, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 14 ret i8 %vecext -; CHECK-LABEL: @getsc14 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 56, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc14 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 16, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getsc15(<16 x i8> %vsc) { +; CHECK-LABEL: getsc15: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsc15: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 8, 56 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 15 ret i8 %vecext -; CHECK-LABEL: @getsc15 -; CHECK: mffprd r3, f0 -; CHECK: clrldi r3, r3, 56 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getsc15 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 8, 56 -; CHECK-LE: extsb r3, r3 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc0(<16 x i8> %vuc) { +; CHECK-LABEL: getuc0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 8, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc0: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 0 ret i8 %vecext -; CHECK-LABEL: @getuc0 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 8, 56 -; CHECK-LE-LABEL: @getuc0 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: clrldi r3, r3, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc1(<16 x i8> %vuc) { +; CHECK-LABEL: getuc1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 16, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc1: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 56, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 1 ret i8 %vecext -; CHECK-LABEL: @getuc1 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 16, 56 -; CHECK-LE-LABEL: @getuc1 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 56, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc2(<16 x i8> %vuc) { +; CHECK-LABEL: getuc2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 24, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc2: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 48, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 2 ret i8 %vecext -; CHECK-LABEL: @getuc2 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 24, 56 -; CHECK-LE-LABEL: @getuc2 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 48, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc3(<16 x i8> %vuc) { +; CHECK-LABEL: getuc3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 32, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc3: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 40, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 3 ret i8 %vecext -; CHECK-LABEL: @getuc3 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 32, 56 -; CHECK-LE-LABEL: @getuc3 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 40, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc4(<16 x i8> %vuc) { +; CHECK-LABEL: getuc4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 40, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc4: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 32, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 4 ret i8 %vecext -; CHECK-LABEL: @getuc4 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 40, 56 -; CHECK-LE-LABEL: @getuc4 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 32, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc5(<16 x i8> %vuc) { +; CHECK-LABEL: getuc5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 48, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc5: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 24, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 5 ret i8 %vecext -; CHECK-LABEL: @getuc5 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 48, 56 -; CHECK-LE-LABEL: @getuc5 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 24, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc6(<16 x i8> %vuc) { +; CHECK-LABEL: getuc6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 56, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc6: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 16, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 6 ret i8 %vecext -; CHECK-LABEL: @getuc6 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 56, 56 -; CHECK-LE-LABEL: @getuc6 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 16, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc7(<16 x i8> %vuc) { +; CHECK-LABEL: getuc7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc7: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 8, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 7 ret i8 %vecext -; CHECK-LABEL: @getuc7 -; CHECK: mfvsrd r3, v2 -; CHECK: clrldi r3, r3, 56 -; CHECK-LE-LABEL: @getuc7 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 8, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc8(<16 x i8> %vuc) { +; CHECK-LABEL: getuc8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 8, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc8: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 8 ret i8 %vecext -; CHECK-LABEL: @getuc8 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 8, 56 -; CHECK-LE-LABEL: @getuc8 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: clrldi r3, r3, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc9(<16 x i8> %vuc) { +; CHECK-LABEL: getuc9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 16, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc9: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 56, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 9 ret i8 %vecext -; CHECK-LABEL: @getuc9 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 16, 56 -; CHECK-LE-LABEL: @getuc9 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 56, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc10(<16 x i8> %vuc) { +; CHECK-LABEL: getuc10: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 24, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc10: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 48, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 10 ret i8 %vecext -; CHECK-LABEL: @getuc10 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 24, 56 -; CHECK-LE-LABEL: @getuc10 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 48, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc11(<16 x i8> %vuc) { +; CHECK-LABEL: getuc11: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 32, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc11: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 40, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 11 ret i8 %vecext -; CHECK-LABEL: @getuc11 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 32, 56 -; CHECK-LE-LABEL: @getuc11 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 40, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc12(<16 x i8> %vuc) { +; CHECK-LABEL: getuc12: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 40, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc12: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 32, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 12 ret i8 %vecext -; CHECK-LABEL: @getuc12 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 40, 56 -; CHECK-LE-LABEL: @getuc12 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 32, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc13(<16 x i8> %vuc) { +; CHECK-LABEL: getuc13: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 48, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc13: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 24, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 13 ret i8 %vecext -; CHECK-LABEL: @getuc13 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 48, 56 -; CHECK-LE-LABEL: @getuc13 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 24, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc14(<16 x i8> %vuc) { +; CHECK-LABEL: getuc14: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 56, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc14: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 16, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 14 ret i8 %vecext -; CHECK-LABEL: @getuc14 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 56, 56 -; CHECK-LE-LABEL: @getuc14 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 16, 56 } ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getuc15(<16 x i8> %vuc) { +; CHECK-LABEL: getuc15: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getuc15: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 8, 56 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 15 ret i8 %vecext -; CHECK-LABEL: @getuc15 -; CHECK: mffprd r3, f0 -; CHECK: clrldi r3, r3, 56 -; CHECK-LE-LABEL: @getuc15 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 8, 56 } ; Function Attrs: norecurse nounwind readnone define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) { -; CHECK-LABEL: @getvelsc -; CHECK: andi. r4, r5, 8 -; CHECK: li r3, 7 -; CHECK: lvsl v3, 0, r4 -; CHECK: andc r3, r3, r5 -; CHECK: sldi r3, r3, 3 -; CHECK: vperm v2, v2, v2, v3 -; CHECK: mfvsrd r4, v2 -; CHECK: srd r3, r4, r3 -; CHECK: extsb r3, r3 -; CHECK-LE-LABEL: @getvelsc -; CHECK-LE: li r3, 8 -; CHECK-LE: andc r3, r3, r5 -; CHECK-LE: lvsl v3, 0, r3 -; CHECK-LE: li r3, 7 -; CHECK-LE: and r3, r3, r5 -; CHECK-LE: vperm v2, v2, v2, v3 -; CHECK-LE: sldi r3, r3, 3 -; CHECK-LE: mfvsrd r4, v2 -; CHECK-LE: srd r3, r4, r3 -; CHECK-LE: extsb r3, r3 +; CHECK-LABEL: getvelsc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi. r4, r5, 8 +; CHECK-NEXT: li r3, 7 +; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: andc r3, r3, r5 +; CHECK-NEXT: sldi r3, r3, 3 +; CHECK-NEXT: vperm v2, v2, v2, v3 +; CHECK-NEXT: mfvsrd r4, v2 +; CHECK-NEXT: srd r3, r4, r3 +; CHECK-NEXT: extsb r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getvelsc: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: li r3, 8 +; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: lvsl v3, 0, r3 +; CHECK-LE-NEXT: li r3, 7 +; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: vperm v2, v2, v2, v3 +; CHECK-LE-NEXT: sldi r3, r3, 3 +; CHECK-LE-NEXT: mfvsrd r4, v2 +; CHECK-LE-NEXT: srd r3, r4, r3 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vsc, i32 %i ret i8 %vecext @@ -575,27 +838,32 @@ ; Function Attrs: norecurse nounwind readnone define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) { -; CHECK-LABEL: @getveluc -; CHECK: andi. r4, r5, 8 -; CHECK: li r3, 7 -; CHECK: lvsl v3, 0, r4 -; CHECK: andc r3, r3, r5 -; CHECK: sldi r3, r3, 3 -; CHECK: vperm v2, v2, v2, v3 -; CHECK: mfvsrd r4, v2 -; CHECK: srd r3, r4, r3 -; CHECK: clrldi r3, r3, 5 -; CHECK-LE-LABEL: @getveluc -; CHECK-LE: li r3, 8 -; CHECK-LE: andc r3, r3, r5 -; CHECK-LE: lvsl v3, 0, r3 -; CHECK-LE: li r3, 7 -; CHECK-LE: and r3, r3, r5 -; CHECK-LE: vperm v2, v2, v2, v3 -; CHECK-LE: sldi r3, r3, 3 -; CHECK-LE: mfvsrd r4, v2 -; CHECK-LE: srd r3, r4, r3 -; CHECK-LE: clrldi r3, r3, 56 +; CHECK-LABEL: getveluc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi. r4, r5, 8 +; CHECK-NEXT: li r3, 7 +; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: andc r3, r3, r5 +; CHECK-NEXT: sldi r3, r3, 3 +; CHECK-NEXT: vperm v2, v2, v2, v3 +; CHECK-NEXT: mfvsrd r4, v2 +; CHECK-NEXT: srd r3, r4, r3 +; CHECK-NEXT: clrldi r3, r3, 56 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getveluc: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: li r3, 8 +; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: lvsl v3, 0, r3 +; CHECK-LE-NEXT: li r3, 7 +; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: vperm v2, v2, v2, v3 +; CHECK-LE-NEXT: sldi r3, r3, 3 +; CHECK-LE-NEXT: mfvsrd r4, v2 +; CHECK-LE-NEXT: srd r3, r4, r3 +; CHECK-LE-NEXT: clrldi r3, r3, 56 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <16 x i8> %vuc, i32 %i ret i8 %vecext @@ -603,253 +871,370 @@ ; Function Attrs: norecurse nounwind readnone define signext i16 @getss0(<8 x i16> %vss) { +; CHECK-LABEL: getss0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 16, 48 +; CHECK-NEXT: extsh r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getss0: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: clrldi r3, r3, 48 +; CHECK-LE-NEXT: extsh r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vss, i32 0 ret i16 %vecext -; CHECK-LABEL: @getss0 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 16, 48 -; CHECK: extsh r3, r3 -; CHECK-LE-LABEL: @getss0 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: clrldi r3, r3, 48 -; CHECK-LE: extsh r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i16 @getss1(<8 x i16> %vss) { +; CHECK-LABEL: getss1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 32, 48 +; CHECK-NEXT: extsh r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getss1: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 48, 48 +; CHECK-LE-NEXT: extsh r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vss, i32 1 ret i16 %vecext -; CHECK-LABEL: @getss1 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 32, 48 -; CHECK: extsh r3, r3 -; CHECK-LE-LABEL: @getss1 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 48, 48 -; CHECK-LE: extsh r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i16 @getss2(<8 x i16> %vss) { +; CHECK-LABEL: getss2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 48, 48 +; CHECK-NEXT: extsh r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getss2: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 32, 48 +; CHECK-LE-NEXT: extsh r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vss, i32 2 ret i16 %vecext -; CHECK-LABEL: @getss2 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 48, 48 -; CHECK: extsh r3, r3 -; CHECK-LE-LABEL: @getss2 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 32, 48 -; CHECK-LE: extsh r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i16 @getss3(<8 x i16> %vss) { +; CHECK-LABEL: getss3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: extsh r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getss3: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 16, 48 +; CHECK-LE-NEXT: extsh r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vss, i32 3 ret i16 %vecext -; CHECK-LABEL: @getss3 -; CHECK: mfvsrd r3, v2 -; CHECK: clrldi r3, r3, 48 -; CHECK: extsh r3, r3 -; CHECK-LE-LABEL: @getss3 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 16, 48 -; CHECK-LE: extsh r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i16 @getss4(<8 x i16> %vss) { +; CHECK-LABEL: getss4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 16, 48 +; CHECK-NEXT: extsh r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getss4: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: clrldi r3, r3, 48 +; CHECK-LE-NEXT: extsh r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vss, i32 4 ret i16 %vecext -; CHECK-LABEL: @getss4 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 16, 48 -; CHECK: extsh r3, r3 -; CHECK-LE-LABEL: @getss4 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: clrldi r3, r3, 48 -; CHECK-LE: extsh r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i16 @getss5(<8 x i16> %vss) { +; CHECK-LABEL: getss5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 32, 48 +; CHECK-NEXT: extsh r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getss5: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 48, 48 +; CHECK-LE-NEXT: extsh r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vss, i32 5 ret i16 %vecext -; CHECK-LABEL: @getss5 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 32, 48 -; CHECK: extsh r3, r3 -; CHECK-LE-LABEL: @getss5 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 48, 48 -; CHECK-LE: extsh r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i16 @getss6(<8 x i16> %vss) { +; CHECK-LABEL: getss6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 48, 48 +; CHECK-NEXT: extsh r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getss6: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 32, 48 +; CHECK-LE-NEXT: extsh r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vss, i32 6 ret i16 %vecext -; CHECK-LABEL: @getss6 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 48, 48 -; CHECK: extsh r3, r3 -; CHECK-LE-LABEL: @getss6 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 32, 48 -; CHECK-LE: extsh r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i16 @getss7(<8 x i16> %vss) { +; CHECK-LABEL: getss7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: extsh r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getss7: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 16, 48 +; CHECK-LE-NEXT: extsh r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vss, i32 7 ret i16 %vecext -; CHECK-LABEL: @getss7 -; CHECK: mffprd r3, f0 -; CHECK: clrldi r3, r3, 48 -; CHECK: extsh r3, r3 -; CHECK-LE-LABEL: @getss7 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 16, 48 -; CHECK-LE: extsh r3, r3 } ; Function Attrs: norecurse nounwind readnone define zeroext i16 @getus0(<8 x i16> %vus) { +; CHECK-LABEL: getus0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 16, 48 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getus0: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: clrldi r3, r3, 48 +; CHECK-LE-NEXT: clrldi r3, r3, 48 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vus, i32 0 ret i16 %vecext -; CHECK-LABEL: @getus0 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 16, 48 -; CHECK-LE-LABEL: @getus0 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: clrldi r3, r3, 48 } ; Function Attrs: norecurse nounwind readnone define zeroext i16 @getus1(<8 x i16> %vus) { +; CHECK-LABEL: getus1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 32, 48 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getus1: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 48, 48 +; CHECK-LE-NEXT: clrldi r3, r3, 48 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vus, i32 1 ret i16 %vecext -; CHECK-LABEL: @getus1 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 32, 48 -; CHECK-LE-LABEL: @getus1 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 48, 48 } ; Function Attrs: norecurse nounwind readnone define zeroext i16 @getus2(<8 x i16> %vus) { +; CHECK-LABEL: getus2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: rldicl r3, r3, 48, 48 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getus2: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 32, 48 +; CHECK-LE-NEXT: clrldi r3, r3, 48 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vus, i32 2 ret i16 %vecext -; CHECK-LABEL: @getus2 -; CHECK: mfvsrd r3, v2 -; CHECK: rldicl r3, r3, 48, 48 -; CHECK-LE-LABEL: @getus2 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 32, 48 } ; Function Attrs: norecurse nounwind readnone define zeroext i16 @getus3(<8 x i16> %vus) { +; CHECK-LABEL: getus3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getus3: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: rldicl r3, r3, 16, 48 +; CHECK-LE-NEXT: clrldi r3, r3, 48 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vus, i32 3 ret i16 %vecext -; CHECK-LABEL: @getus3 -; CHECK: mfvsrd r3, v2 -; CHECK: clrldi r3, r3, 48 -; CHECK-LE-LABEL: @getus3 -; CHECK-LE: mffprd r3, f0 -; CHECK-LE: rldicl r3, r3, 16, 48 } ; Function Attrs: norecurse nounwind readnone define zeroext i16 @getus4(<8 x i16> %vus) { +; CHECK-LABEL: getus4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 16, 48 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getus4: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: clrldi r3, r3, 48 +; CHECK-LE-NEXT: clrldi r3, r3, 48 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vus, i32 4 ret i16 %vecext -; CHECK-LABEL: @getus4 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 16, 48 -; CHECK-LE-LABEL: @getus4 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: clrldi r3, r3, 48 } ; Function Attrs: norecurse nounwind readnone define zeroext i16 @getus5(<8 x i16> %vus) { +; CHECK-LABEL: getus5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 32, 48 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getus5: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 48, 48 +; CHECK-LE-NEXT: clrldi r3, r3, 48 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vus, i32 5 ret i16 %vecext -; CHECK-LABEL: @getus5 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 32, 48 -; CHECK-LE-LABEL: @getus5 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 48, 48 } ; Function Attrs: norecurse nounwind readnone define zeroext i16 @getus6(<8 x i16> %vus) { +; CHECK-LABEL: getus6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: rldicl r3, r3, 48, 48 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getus6: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 32, 48 +; CHECK-LE-NEXT: clrldi r3, r3, 48 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vus, i32 6 ret i16 %vecext -; CHECK-LABEL: @getus6 -; CHECK: mffprd r3, f0 -; CHECK: rldicl r3, r3, 48, 48 -; CHECK-LE-LABEL: @getus6 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 32, 48 } ; Function Attrs: norecurse nounwind readnone define zeroext i16 @getus7(<8 x i16> %vus) { +; CHECK-LABEL: getus7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getus7: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: rldicl r3, r3, 16, 48 +; CHECK-LE-NEXT: clrldi r3, r3, 48 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vus, i32 7 ret i16 %vecext -; CHECK-LABEL: @getus7 -; CHECK: mffprd r3, f0 -; CHECK: clrldi r3, r3, 48 -; CHECK-LE-LABEL: @getus7 -; CHECK-LE: mfvsrd r3, v2 -; CHECK-LE: rldicl r3, r3, 16, 48 } ; Function Attrs: norecurse nounwind readnone define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) { -; CHECK-LABEL: @getvelss -; CHECK: andi. r4, r5, 4 -; CHECK: li r3, 3 -; CHECK: sldi r4, r4, 1 -; CHECK: andc r3, r3, r5 -; CHECK: lvsl v3, 0, r4 -; CHECK: sldi r3, r3, 4 -; CHECK: vperm v2, v2, v2, v3 -; CHECK: mfvsrd r4, v2 -; CHECK: srd r3, r4, r3 -; CHECK: extsh r3, r3 -; CHECK-LE-LABEL: @getvelss -; CHECK-LE: li r3, 4 -; CHECK-LE: andc r3, r3, r5 -; CHECK-LE: sldi r3, r3, 1 -; CHECK-LE: lvsl v3, 0, r3 -; CHECK-LE: li r3, 3 -; CHECK-LE: and r3, r3, r5 -; CHECK-LE: vperm v2, v2, v2, v3 -; CHECK-LE: sldi r3, r3, 4 -; CHECK-LE: mfvsrd r4, v2 -; CHECK-LE: srd r3, r4, r3 -; CHECK-LE: extsh r3, r3 +; CHECK-LABEL: getvelss: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi. r4, r5, 4 +; CHECK-NEXT: li r3, 3 +; CHECK-NEXT: sldi r4, r4, 1 +; CHECK-NEXT: andc r3, r3, r5 +; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: sldi r3, r3, 4 +; CHECK-NEXT: vperm v2, v2, v2, v3 +; CHECK-NEXT: mfvsrd r4, v2 +; CHECK-NEXT: srd r3, r4, r3 +; CHECK-NEXT: extsh r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getvelss: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: li r3, 4 +; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: sldi r3, r3, 1 +; CHECK-LE-NEXT: lvsl v3, 0, r3 +; CHECK-LE-NEXT: li r3, 3 +; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: vperm v2, v2, v2, v3 +; CHECK-LE-NEXT: sldi r3, r3, 4 +; CHECK-LE-NEXT: mfvsrd r4, v2 +; CHECK-LE-NEXT: srd r3, r4, r3 +; CHECK-LE-NEXT: extsh r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vss, i32 %i ret i16 %vecext @@ -857,29 +1242,34 @@ ; Function Attrs: norecurse nounwind readnone define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) { -; CHECK-LABEL: @getvelus -; CHECK: andi. r4, r5, 4 -; CHECK: li r3, 3 -; CHECK: sldi r4, r4, 1 -; CHECK: andc r3, r3, r5 -; CHECK: lvsl v3, 0, r4 -; CHECK: sldi r3, r3, 4 -; CHECK: vperm v2, v2, v2, v3 -; CHECK: mfvsrd r4, v2 -; CHECK: srd r3, r4, r3 -; CHECK: clrldi r3, r3, 48 -; CHECK-LE-LABEL: @getvelus -; CHECK-LE: li r3, 4 -; CHECK-LE: andc r3, r3, r5 -; CHECK-LE: sldi r3, r3, 1 -; CHECK-LE: lvsl v3, 0, r3 -; CHECK-LE: li r3, 3 -; CHECK-LE: and r3, r3, r5 -; CHECK-LE: vperm v2, v2, v2, v3 -; CHECK-LE: sldi r3, r3, 4 -; CHECK-LE: mfvsrd r4, v2 -; CHECK-LE: srd r3, r4, r3 -; CHECK-LE: clrldi r3, r3, 48 +; CHECK-LABEL: getvelus: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi. r4, r5, 4 +; CHECK-NEXT: li r3, 3 +; CHECK-NEXT: sldi r4, r4, 1 +; CHECK-NEXT: andc r3, r3, r5 +; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: sldi r3, r3, 4 +; CHECK-NEXT: vperm v2, v2, v2, v3 +; CHECK-NEXT: mfvsrd r4, v2 +; CHECK-NEXT: srd r3, r4, r3 +; CHECK-NEXT: clrldi r3, r3, 48 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getvelus: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: li r3, 4 +; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: sldi r3, r3, 1 +; CHECK-LE-NEXT: lvsl v3, 0, r3 +; CHECK-LE-NEXT: li r3, 3 +; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: vperm v2, v2, v2, v3 +; CHECK-LE-NEXT: sldi r3, r3, 4 +; CHECK-LE-NEXT: mfvsrd r4, v2 +; CHECK-LE-NEXT: srd r3, r4, r3 +; CHECK-LE-NEXT: clrldi r3, r3, 48 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <8 x i16> %vus, i32 %i ret i16 %vecext @@ -887,288 +1277,504 @@ ; Function Attrs: norecurse nounwind readnone define signext i32 @getsi0(<4 x i32> %vsi) { +; CHECK-LABEL: getsi0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsi0: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprwz r3, f0 +; CHECK-LE-NEXT: extsw r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x i32> %vsi, i32 0 ret i32 %vecext -; CHECK-LABEL: @getsi0 -; CHECK: xxsldwi vs0, v2, v2, 3 -; CHECK: mffprwz r3, f0 -; CHECK: extsw r3, r3 -; CHECK-LE-LABEL: @getsi0 -; CHECK-LE: xxswapd vs0, v2 -; CHECK-LE: mffprwz r3, f0 -; CHECK-LE: extsw r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i32 @getsi1(<4 x i32> %vsi) { +; CHECK-LABEL: getsi1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrwz r3, v2 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsi1: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-LE-NEXT: mffprwz r3, f0 +; CHECK-LE-NEXT: extsw r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x i32> %vsi, i32 1 ret i32 %vecext -; CHECK-LABEL: @getsi1 -; CHECK: mfvsrwz r3, v2 -; CHECK: extsw r3, r3 -; CHECK-LE-LABEL: @getsi1 -; CHECK-LE: xxsldwi vs0, v2, v2, 1 -; CHECK-LE: mffprwz r3, f0 -; CHECK-LE: extsw r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i32 @getsi2(<4 x i32> %vsi) { +; CHECK-LABEL: getsi2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsi2: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrwz r3, v2 +; CHECK-LE-NEXT: extsw r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x i32> %vsi, i32 2 ret i32 %vecext -; CHECK-LABEL: @getsi2 -; CHECK: xxsldwi vs0, v2, v2, 1 -; CHECK: mffprwz r3, f0 -; CHECK: extsw r3, r3 -; CHECK-LE-LABEL: @getsi2 -; CHECK-LE: mfvsrwz r3, v2 -; CHECK-LE: extsw r3, r3 } ; Function Attrs: norecurse nounwind readnone define signext i32 @getsi3(<4 x i32> %vsi) { +; CHECK-LABEL: getsi3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsi3: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-LE-NEXT: mffprwz r3, f0 +; CHECK-LE-NEXT: extsw r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x i32> %vsi, i32 3 ret i32 %vecext -; CHECK-LABEL: @getsi3 -; CHECK: xxswapd vs0, v2 -; CHECK: mffprwz r3, f0 -; CHECK: extsw r3, r3 -; CHECK-LE-LABEL: @getsi3 -; CHECK-LE: xxsldwi vs0, v2, v2, 3 -; CHECK-LE: mffprwz r3, f0 -; CHECK-LE: extsw r3, r3 } ; Function Attrs: norecurse nounwind readnone define zeroext i32 @getui0(<4 x i32> %vui) { +; CHECK-LABEL: getui0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getui0: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprwz r3, f0 +; CHECK-LE-NEXT: clrldi r3, r3, 32 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x i32> %vui, i32 0 ret i32 %vecext -; CHECK-LABEL: @getui0 -; CHECK: xxsldwi vs0, v2, v2, 3 -; CHECK: mffprwz r3, f0 -; CHECK-LE-LABEL: @getui0 -; CHECK-LE: xxswapd vs0, v2 -; CHECK-LE: mffprwz r3, f0 } ; Function Attrs: norecurse nounwind readnone define zeroext i32 @getui1(<4 x i32> %vui) { +; CHECK-LABEL: getui1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrwz r3, v2 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getui1: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-LE-NEXT: mffprwz r3, f0 +; CHECK-LE-NEXT: clrldi r3, r3, 32 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x i32> %vui, i32 1 ret i32 %vecext -; CHECK-LABEL: @getui1 -; CHECK: mfvsrwz r3, v2 -; CHECK-LE-LABEL: @getui1 -; CHECK-LE: xxsldwi vs0, v2, v2, 1 -; CHECK-LE: mffprwz r3, f0 } ; Function Attrs: norecurse nounwind readnone define zeroext i32 @getui2(<4 x i32> %vui) { +; CHECK-LABEL: getui2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getui2: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrwz r3, v2 +; CHECK-LE-NEXT: clrldi r3, r3, 32 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x i32> %vui, i32 2 ret i32 %vecext -; CHECK-LABEL: @getui2 -; CHECK: xxsldwi vs0, v2, v2, 1 -; CHECK: mffprwz r3, f0 -; CHECK-LE-LABEL: @getui2 -; CHECK-LE: mfvsrwz r3, v2 } ; Function Attrs: norecurse nounwind readnone define zeroext i32 @getui3(<4 x i32> %vui) { +; CHECK-LABEL: getui3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getui3: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-LE-NEXT: mffprwz r3, f0 +; CHECK-LE-NEXT: clrldi r3, r3, 32 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x i32> %vui, i32 3 ret i32 %vecext -; CHECK-LABEL: @getui3 -; CHECK: xxswapd vs0, v2 -; CHECK: mffprwz r3, f0 -; CHECK-LE-LABEL: @getui3 -; CHECK-LE: xxsldwi vs0, v2, v2, 3 -; CHECK-LE: mffprwz r3, f0 } ; Function Attrs: norecurse nounwind readnone define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) { +; CHECK-LABEL: getvelsi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi. r4, r5, 2 +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: sldi r4, r4, 2 +; CHECK-NEXT: andc r3, r3, r5 +; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: sldi r3, r3, 5 +; CHECK-NEXT: vperm v2, v2, v2, v3 +; CHECK-NEXT: mfvsrd r4, v2 +; CHECK-NEXT: srd r3, r4, r3 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getvelsi: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: li r3, 2 +; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: sldi r3, r3, 2 +; CHECK-LE-NEXT: lvsl v3, 0, r3 +; CHECK-LE-NEXT: li r3, 1 +; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: vperm v2, v2, v2, v3 +; CHECK-LE-NEXT: sldi r3, r3, 5 +; CHECK-LE-NEXT: mfvsrd r4, v2 +; CHECK-LE-NEXT: srd r3, r4, r3 +; CHECK-LE-NEXT: extsw r3, r3 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x i32> %vsi, i32 %i ret i32 %vecext -; CHECK-LABEL: @getvelsi -; CHECK-LE-LABEL: @getvelsi ; FIXME: add check patterns when variable element extraction is implemented } ; Function Attrs: norecurse nounwind readnone define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) { +; CHECK-LABEL: getvelui: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi. r4, r5, 2 +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: sldi r4, r4, 2 +; CHECK-NEXT: andc r3, r3, r5 +; CHECK-NEXT: lvsl v3, 0, r4 +; CHECK-NEXT: sldi r3, r3, 5 +; CHECK-NEXT: vperm v2, v2, v2, v3 +; CHECK-NEXT: mfvsrd r4, v2 +; CHECK-NEXT: srd r3, r4, r3 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getvelui: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: li r3, 2 +; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: sldi r3, r3, 2 +; CHECK-LE-NEXT: lvsl v3, 0, r3 +; CHECK-LE-NEXT: li r3, 1 +; CHECK-LE-NEXT: and r3, r3, r5 +; CHECK-LE-NEXT: vperm v2, v2, v2, v3 +; CHECK-LE-NEXT: sldi r3, r3, 5 +; CHECK-LE-NEXT: mfvsrd r4, v2 +; CHECK-LE-NEXT: srd r3, r4, r3 +; CHECK-LE-NEXT: clrldi r3, r3, 32 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x i32> %vui, i32 %i ret i32 %vecext -; CHECK-LABEL: @getvelui -; CHECK-LE-LABEL: @getvelui ; FIXME: add check patterns when variable element extraction is implemented } ; Function Attrs: norecurse nounwind readnone define i64 @getsl0(<2 x i64> %vsl) { +; CHECK-LABEL: getsl0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsl0: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <2 x i64> %vsl, i32 0 ret i64 %vecext -; CHECK-LABEL: @getsl0 -; CHECK: mfvsrd r3, v2 -; CHECK-LE-LABEL: @getsl0 -; CHECK-LE: xxswapd vs0, v2 -; CHECK-LE: mffprd r3, f0 } ; Function Attrs: norecurse nounwind readnone define i64 @getsl1(<2 x i64> %vsl) { +; CHECK-LABEL: getsl1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getsl1: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <2 x i64> %vsl, i32 1 ret i64 %vecext -; CHECK-LABEL: @getsl1 -; CHECK: xxswapd vs0, v2 -; CHECK: mffprd r3, f0 -; CHECK-LE-LABEL: @getsl1 -; CHECK-LE: mfvsrd r3, v2 } ; Function Attrs: norecurse nounwind readnone define i64 @getul0(<2 x i64> %vul) { +; CHECK-LABEL: getul0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getul0: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: mffprd r3, f0 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <2 x i64> %vul, i32 0 ret i64 %vecext -; CHECK-LABEL: @getul0 -; CHECK: mfvsrd r3, v2 -; CHECK-LE-LABEL: @getul0 -; CHECK-LE: xxswapd vs0, v2 -; CHECK-LE: mffprd r3, f0 } ; Function Attrs: norecurse nounwind readnone define i64 @getul1(<2 x i64> %vul) { +; CHECK-LABEL: getul1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getul1: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <2 x i64> %vul, i32 1 ret i64 %vecext -; CHECK-LABEL: @getul1 -; CHECK: xxswapd vs0, v2 -; CHECK: mffprd r3, f0 -; CHECK-LE-LABEL: @getul1 -; CHECK-LE: mfvsrd r3, v2 } ; Function Attrs: norecurse nounwind readnone define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) { +; CHECK-LABEL: getvelsl: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi. r3, r5, 1 +; CHECK-NEXT: sldi r3, r3, 3 +; CHECK-NEXT: lvsl v3, 0, r3 +; CHECK-NEXT: vperm v2, v2, v2, v3 +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getvelsl: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: li r3, 1 +; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: sldi r3, r3, 3 +; CHECK-LE-NEXT: lvsl v3, 0, r3 +; CHECK-LE-NEXT: vperm v2, v2, v2, v3 +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <2 x i64> %vsl, i32 %i ret i64 %vecext -; CHECK-LABEL: @getvelsl -; CHECK-LE-LABEL: @getvelsl ; FIXME: add check patterns when variable element extraction is implemented } ; Function Attrs: norecurse nounwind readnone define i64 @getvelul(<2 x i64> %vul, i32 signext %i) { +; CHECK-LABEL: getvelul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi. r3, r5, 1 +; CHECK-NEXT: sldi r3, r3, 3 +; CHECK-NEXT: lvsl v3, 0, r3 +; CHECK-NEXT: vperm v2, v2, v2, v3 +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getvelul: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: li r3, 1 +; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: sldi r3, r3, 3 +; CHECK-LE-NEXT: lvsl v3, 0, r3 +; CHECK-LE-NEXT: vperm v2, v2, v2, v3 +; CHECK-LE-NEXT: mfvsrd r3, v2 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <2 x i64> %vul, i32 %i ret i64 %vecext -; CHECK-LABEL: @getvelul -; CHECK-LE-LABEL: @getvelul ; FIXME: add check patterns when variable element extraction is implemented } ; Function Attrs: norecurse nounwind readnone define float @getf0(<4 x float> %vf) { +; CHECK-LABEL: getf0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvspdpn f1, v2 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getf0: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-LE-NEXT: xscvspdpn f1, vs0 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x float> %vf, i32 0 ret float %vecext -; CHECK-LABEL: @getf0 -; CHECK: xscvspdpn f1, v2 -; CHECK-LE-LABEL: @getf0 -; CHECK-LE: xxsldwi vs0, v2, v2, 3 -; CHECK-LE: xscvspdpn f1, vs0 } ; Function Attrs: norecurse nounwind readnone define float @getf1(<4 x float> %vf) { +; CHECK-LABEL: getf1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getf1: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs0, v2 +; CHECK-LE-NEXT: xscvspdpn f1, vs0 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x float> %vf, i32 1 ret float %vecext -; CHECK-LABEL: @getf1 -; CHECK: xxsldwi vs0, v2, v2, 1 -; CHECK: xscvspdpn f1, vs0 -; CHECK-LE-LABEL: @getf1 -; CHECK-LE: xxswapd vs0, v2 -; CHECK-LE: xscvspdpn f1, vs0 } ; Function Attrs: norecurse nounwind readnone define float @getf2(<4 x float> %vf) { +; CHECK-LABEL: getf2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs0, v2 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getf2: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-LE-NEXT: xscvspdpn f1, vs0 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x float> %vf, i32 2 ret float %vecext -; CHECK-LABEL: @getf2 -; CHECK: xxswapd vs0, v2 -; CHECK: xscvspdpn f1, vs0 -; CHECK-LE-LABEL: @getf2 -; CHECK-LE: xxsldwi vs0, v2, v2, 1 -; CHECK-LE: xscvspdpn f1, vs0 } ; Function Attrs: norecurse nounwind readnone define float @getf3(<4 x float> %vf) { +; CHECK-LABEL: getf3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getf3: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xscvspdpn f1, v2 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x float> %vf, i32 3 ret float %vecext -; CHECK-LABEL: @getf3 -; CHECK: xxsldwi vs0, v2, v2, 3 -; CHECK: xscvspdpn f1, vs0 -; CHECK-LE-LABEL: @getf3 -; CHECK-LE: xscvspdpn f1, v2 } ; Function Attrs: norecurse nounwind readnone define float @getvelf(<4 x float> %vf, i32 signext %i) { +; CHECK-LABEL: getvelf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r3, r5, 2 +; CHECK-NEXT: lvsl v3, 0, r3 +; CHECK-NEXT: vperm v2, v2, v2, v3 +; CHECK-NEXT: xscvspdpn f1, v2 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getvelf: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xori r3, r5, 3 +; CHECK-LE-NEXT: sldi r3, r3, 2 +; CHECK-LE-NEXT: lvsl v3, 0, r3 +; CHECK-LE-NEXT: vperm v2, v2, v2, v3 +; CHECK-LE-NEXT: xscvspdpn f1, v2 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <4 x float> %vf, i32 %i ret float %vecext -; CHECK-LABEL: @getvelf -; CHECK-LE-LABEL: @getvelf ; FIXME: add check patterns when variable element extraction is implemented } ; Function Attrs: norecurse nounwind readnone define double @getd0(<2 x double> %vd) { +; CHECK-LABEL: getd0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlor f1, v2, v2 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getd0: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxswapd vs1, v2 +; CHECK-LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <2 x double> %vd, i32 0 ret double %vecext -; CHECK-LABEL: @getd0 -; CHECK: xxlor f1, v2, v2 -; CHECK-LE-LABEL: @getd0 -; CHECK-LE: xxswapd vs1, v2 } ; Function Attrs: norecurse nounwind readnone define double @getd1(<2 x double> %vd) { +; CHECK-LABEL: getd1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd vs1, v2 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getd1: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxlor f1, v2, v2 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <2 x double> %vd, i32 1 ret double %vecext -; CHECK-LABEL: @getd1 -; CHECK: xxswapd vs1, v2 -; CHECK-LE-LABEL: @getd1 -; CHECK-LE: xxlor f1, v2, v2 } ; Function Attrs: norecurse nounwind readnone define double @getveld(<2 x double> %vd, i32 signext %i) { +; CHECK-LABEL: getveld: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi. r3, r5, 1 +; CHECK-NEXT: sldi r3, r3, 3 +; CHECK-NEXT: lvsl v3, 0, r3 +; CHECK-NEXT: vperm v2, v2, v2, v3 +; CHECK-NEXT: xxlor vs1, v2, v2 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: blr +; +; CHECK-LE-LABEL: getveld: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: li r3, 1 +; CHECK-LE-NEXT: andc r3, r3, r5 +; CHECK-LE-NEXT: sldi r3, r3, 3 +; CHECK-LE-NEXT: lvsl v3, 0, r3 +; CHECK-LE-NEXT: vperm v2, v2, v2, v3 +; CHECK-LE-NEXT: xxlor vs1, v2, v2 +; CHECK-LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-LE-NEXT: blr entry: %vecext = extractelement <2 x double> %vd, i32 %i ret double %vecext -; CHECK-LABEL: @getveld -; CHECK-LE-LABEL: @getveld ; FIXME: add check patterns when variable element extraction is implemented } diff --git a/llvm/test/CodeGen/PowerPC/pr25080.ll b/llvm/test/CodeGen/PowerPC/pr25080.ll --- a/llvm/test/CodeGen/PowerPC/pr25080.ll +++ b/llvm/test/CodeGen/PowerPC/pr25080.ll @@ -64,42 +64,37 @@ ; BE-NEXT: xxswapd 0, 35 ; BE-NEXT: mfvsrwz 3, 35 ; BE-NEXT: xxsldwi 1, 35, 35, 1 -; BE-NEXT: sldi 3, 3, 48 -; BE-NEXT: mffprwz 4, 0 -; BE-NEXT: xxsldwi 0, 35, 35, 3 -; BE-NEXT: mtvsrd 36, 3 -; BE-NEXT: mffprwz 3, 1 -; BE-NEXT: sldi 4, 4, 48 -; BE-NEXT: xxswapd 1, 34 -; BE-NEXT: mtvsrd 35, 4 ; BE-NEXT: mfvsrwz 4, 34 -; BE-NEXT: sldi 3, 3, 48 -; BE-NEXT: mtvsrd 37, 3 -; BE-NEXT: mffprwz 3, 0 -; BE-NEXT: sldi 4, 4, 48 -; BE-NEXT: xxsldwi 0, 34, 34, 1 -; BE-NEXT: vmrghh 3, 5, 3 -; BE-NEXT: mtvsrd 37, 4 -; BE-NEXT: sldi 3, 3, 48 -; BE-NEXT: mffprwz 4, 1 -; BE-NEXT: xxsldwi 1, 34, 34, 3 -; BE-NEXT: mtvsrd 34, 3 +; BE-NEXT: mtvsrwz 36, 3 +; BE-NEXT: xxsldwi 2, 35, 35, 3 ; BE-NEXT: mffprwz 3, 0 -; BE-NEXT: sldi 4, 4, 48 -; BE-NEXT: mtvsrd 32, 4 +; BE-NEXT: xxswapd 0, 34 +; BE-NEXT: mtvsrwz 35, 4 ; BE-NEXT: mffprwz 4, 1 -; BE-NEXT: sldi 3, 3, 48 -; BE-NEXT: mtvsrd 33, 3 -; BE-NEXT: sldi 3, 4, 48 -; BE-NEXT: vmrghh 2, 2, 4 -; BE-NEXT: mtvsrd 36, 3 +; BE-NEXT: xxsldwi 1, 34, 34, 1 +; BE-NEXT: mtvsrwz 37, 3 ; BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha -; BE-NEXT: vmrghh 0, 1, 0 ; BE-NEXT: addi 3, 3, .LCPI0_1@toc@l -; BE-NEXT: vmrghh 4, 4, 5 +; BE-NEXT: mtvsrwz 32, 4 +; BE-NEXT: mffprwz 4, 0 +; BE-NEXT: lxvw4x 33, 0, 3 +; BE-NEXT: xxsldwi 0, 34, 34, 3 +; BE-NEXT: mffprwz 3, 1 +; BE-NEXT: mffprwz 5, 2 +; BE-NEXT: vperm 2, 0, 5, 1 +; BE-NEXT: mtvsrwz 37, 3 +; BE-NEXT: mffprwz 3, 0 +; BE-NEXT: mtvsrwz 38, 5 +; BE-NEXT: mtvsrwz 39, 4 +; BE-NEXT: mtvsrwz 32, 3 +; BE-NEXT: addis 3, 2, .LCPI0_2@toc@ha +; BE-NEXT: vperm 4, 6, 4, 1 +; BE-NEXT: addi 3, 3, .LCPI0_2@toc@l +; BE-NEXT: vperm 5, 5, 7, 1 ; BE-NEXT: lxvw4x 0, 0, 3 -; BE-NEXT: vmrghw 2, 2, 3 -; BE-NEXT: vmrghw 3, 4, 0 +; BE-NEXT: vperm 3, 0, 3, 1 +; BE-NEXT: vmrghw 2, 4, 2 +; BE-NEXT: vmrghw 3, 3, 5 ; BE-NEXT: xxmrghd 34, 35, 34 ; BE-NEXT: vspltish 3, 15 ; BE-NEXT: xxlor 34, 34, 0 diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -297,16 +297,14 @@ ; P9BE-LABEL: test32: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: add r5, r3, r4 -; P9BE-NEXT: lfiwzx f0, r3, r4 +; P9BE-NEXT: lxsiwzx v2, r3, r4 ; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha ; P9BE-NEXT: xxlxor v3, v3, v3 -; P9BE-NEXT: xxsldwi v2, f0, f0, 1 ; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l ; P9BE-NEXT: lxvx v4, 0, r3 ; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: lfiwzx f0, r5, r3 +; P9BE-NEXT: lxsiwzx v5, r5, r3 ; P9BE-NEXT: vperm v2, v3, v2, v4 -; P9BE-NEXT: xxsldwi v5, f0, f0, 1 ; P9BE-NEXT: vperm v3, v3, v5, v4 ; P9BE-NEXT: vspltisw v4, 8 ; P9BE-NEXT: vnegw v3, v3 @@ -373,19 +371,21 @@ ; P9BE-NEXT: sldi r4, r4, 1 ; P9BE-NEXT: li r7, 16 ; P9BE-NEXT: add r6, r3, r4 -; P9BE-NEXT: lxsihzx v4, r3, r4 -; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; P9BE-NEXT: lxsihzx v5, r3, r4 +; P9BE-NEXT: addis r3, r2, .LCPI3_1@toc@ha ; P9BE-NEXT: lxsihzx v2, r6, r7 +; P9BE-NEXT: addis r6, r2, .LCPI3_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI3_1@toc@l +; P9BE-NEXT: addi r6, r6, .LCPI3_0@toc@l +; P9BE-NEXT: vsplth v5, v5, 3 +; P9BE-NEXT: lxvx v3, 0, r6 ; P9BE-NEXT: li r6, 0 -; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; P9BE-NEXT: sldi r6, r6, 48 -; P9BE-NEXT: vsplth v4, v4, 3 -; P9BE-NEXT: mtvsrd v3, r6 ; P9BE-NEXT: vsplth v2, v2, 3 -; P9BE-NEXT: vmrghh v4, v3, v4 -; P9BE-NEXT: vmrghh v2, v3, v2 -; P9BE-NEXT: vsplth v3, v3, 0 -; P9BE-NEXT: vmrghw v3, v3, v4 +; P9BE-NEXT: mtvsrwz v4, r6 +; P9BE-NEXT: vperm v2, v4, v2, v3 +; P9BE-NEXT: vperm v3, v4, v5, v3 +; P9BE-NEXT: vsplth v4, v4, 3 +; P9BE-NEXT: vmrghw v3, v4, v3 ; P9BE-NEXT: lxvx v4, 0, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vperm v2, v3, v2, v4 @@ -461,24 +461,26 @@ ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: add r6, r3, r4 ; P9BE-NEXT: li r7, 8 -; P9BE-NEXT: lxsibzx v4, r3, r4 -; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; P9BE-NEXT: lxsibzx v5, r3, r4 +; P9BE-NEXT: addis r3, r2, .LCPI4_1@toc@ha ; P9BE-NEXT: lxsibzx v2, r6, r7 +; P9BE-NEXT: addis r6, r2, .LCPI4_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI4_1@toc@l +; P9BE-NEXT: addi r6, r6, .LCPI4_0@toc@l +; P9BE-NEXT: vspltb v5, v5, 7 +; P9BE-NEXT: lxvx v3, 0, r6 ; P9BE-NEXT: li r6, 0 -; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; P9BE-NEXT: sldi r6, r6, 56 -; P9BE-NEXT: vspltb v4, v4, 7 -; P9BE-NEXT: mtvsrd v3, r6 ; P9BE-NEXT: vspltb v2, v2, 7 -; P9BE-NEXT: vmrghb v4, v3, v4 -; P9BE-NEXT: vmrghb v2, v3, v2 -; P9BE-NEXT: vspltb v3, v3, 0 -; P9BE-NEXT: vmrghh v4, v4, v3 -; P9BE-NEXT: xxspltw v3, v3, 0 -; P9BE-NEXT: vmrghw v2, v4, v2 -; P9BE-NEXT: lxvx v4, 0, r3 +; P9BE-NEXT: mtvsrwz v4, r6 +; P9BE-NEXT: vperm v2, v4, v2, v3 +; P9BE-NEXT: vperm v3, v4, v5, v3 +; P9BE-NEXT: vspltb v4, v4, 7 +; P9BE-NEXT: vmrghh v3, v3, v4 +; P9BE-NEXT: xxspltw v4, v4, 0 +; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: lxvx v3, 0, r3 ; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vperm v2, v3, v2, v4 +; P9BE-NEXT: vperm v2, v4, v2, v3 ; P9BE-NEXT: xxspltw v3, v2, 1 ; P9BE-NEXT: vadduwm v2, v2, v3 ; P9BE-NEXT: vextuwlx r3, r3, v2 diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll @@ -35,10 +35,10 @@ ; ; P8BE-LABEL: s2v_test1: ; P8BE: # %bb.0: # %entry -; P8BE-NEXT: lfiwzx f0, 0, r3 -; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P8BE-NEXT: xxsldwi vs0, v2, vs0, 1 -; P8BE-NEXT: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: lxsiwzx v3, 0, r3 +; P8BE-NEXT: vmrghw v4, v2, v3 +; P8BE-NEXT: xxsldwi vs0, v2, v3, 1 +; P8BE-NEXT: xxsldwi v2, v4, vs0, 3 ; P8BE-NEXT: blr entry: %0 = load i32, i32* %int32, align 4 @@ -75,10 +75,10 @@ ; P8BE-LABEL: s2v_test2: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: addi r3, r3, 4 -; P8BE-NEXT: lfiwzx f0, 0, r3 -; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P8BE-NEXT: xxsldwi vs0, v2, vs0, 1 -; P8BE-NEXT: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: lxsiwzx v3, 0, r3 +; P8BE-NEXT: vmrghw v4, v2, v3 +; P8BE-NEXT: xxsldwi vs0, v2, v3, 1 +; P8BE-NEXT: xxsldwi v2, v4, vs0, 3 ; P8BE-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 @@ -118,10 +118,10 @@ ; P8BE-LABEL: s2v_test3: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: sldi r4, r7, 2 -; P8BE-NEXT: lfiwzx f0, r3, r4 -; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P8BE-NEXT: xxsldwi vs0, v2, vs0, 1 -; P8BE-NEXT: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: lxsiwzx v3, r3, r4 +; P8BE-NEXT: vmrghw v4, v2, v3 +; P8BE-NEXT: xxsldwi vs0, v2, v3, 1 +; P8BE-NEXT: xxsldwi v2, v4, vs0, 3 ; P8BE-NEXT: blr entry: %idxprom = sext i32 %Idx to i64 @@ -160,10 +160,10 @@ ; P8BE-LABEL: s2v_test4: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: addi r3, r3, 4 -; P8BE-NEXT: lfiwzx f0, 0, r3 -; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P8BE-NEXT: xxsldwi vs0, v2, vs0, 1 -; P8BE-NEXT: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: lxsiwzx v3, 0, r3 +; P8BE-NEXT: vmrghw v4, v2, v3 +; P8BE-NEXT: xxsldwi vs0, v2, v3, 1 +; P8BE-NEXT: xxsldwi v2, v4, vs0, 3 ; P8BE-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 @@ -199,10 +199,10 @@ ; ; P8BE-LABEL: s2v_test5: ; P8BE: # %bb.0: # %entry -; P8BE-NEXT: lfiwzx f0, 0, r5 -; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P8BE-NEXT: xxsldwi vs0, v2, vs0, 1 -; P8BE-NEXT: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: lxsiwzx v3, 0, r5 +; P8BE-NEXT: vmrghw v4, v2, v3 +; P8BE-NEXT: xxsldwi vs0, v2, v3, 1 +; P8BE-NEXT: xxsldwi v2, v4, vs0, 3 ; P8BE-NEXT: blr entry: %0 = load i32, i32* %ptr1, align 4 @@ -239,10 +239,10 @@ ; ; P8BE-LABEL: s2v_test_f1: ; P8BE: # %bb.0: # %entry -; P8BE-NEXT: lfiwzx f0, 0, r3 -; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 -; P8BE-NEXT: xxsldwi vs0, v2, vs0, 1 -; P8BE-NEXT: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: lxsiwzx v3, 0, r3 +; P8BE-NEXT: vmrghw v4, v2, v3 +; P8BE-NEXT: xxsldwi vs0, v2, v3, 1 +; P8BE-NEXT: xxsldwi v2, v4, vs0, 3 ; P8BE-NEXT: blr entry: %0 = load float, float* %f64, align 4 @@ -263,10 +263,8 @@ ; P9BE-LABEL: s2v_test_f2: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: addi r3, r3, 4 -; P9BE-NEXT: xxspltw v2, v2, 1 -; P9BE-NEXT: lfiwzx f0, 0, r3 -; P9BE-NEXT: xxsldwi v3, f0, f0, 1 -; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: lxsiwzx v3, 0, r3 +; P9BE-NEXT: vmrgow v2, v3, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: s2v_test_f2: @@ -280,10 +278,8 @@ ; P8BE-LABEL: s2v_test_f2: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: addi r3, r3, 4 -; P8BE-NEXT: xxspltw v2, v2, 1 -; P8BE-NEXT: lfiwzx f0, 0, r3 -; P8BE-NEXT: xxsldwi v3, f0, f0, 1 -; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: lxsiwzx v3, 0, r3 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %f64, i64 1 @@ -305,10 +301,8 @@ ; P9BE-LABEL: s2v_test_f3: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: sldi r4, r7, 2 -; P9BE-NEXT: xxspltw v2, v2, 1 -; P9BE-NEXT: lfiwzx f0, r3, r4 -; P9BE-NEXT: xxsldwi v3, f0, f0, 1 -; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: lxsiwzx v3, r3, r4 +; P9BE-NEXT: vmrgow v2, v3, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: s2v_test_f3: @@ -322,10 +316,8 @@ ; P8BE-LABEL: s2v_test_f3: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: sldi r4, r7, 2 -; P8BE-NEXT: xxspltw v2, v2, 1 -; P8BE-NEXT: lfiwzx f0, r3, r4 -; P8BE-NEXT: xxsldwi v3, f0, f0, 1 -; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: lxsiwzx v3, r3, r4 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr entry: %idxprom = sext i32 %Idx to i64 @@ -348,10 +340,8 @@ ; P9BE-LABEL: s2v_test_f4: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: addi r3, r3, 4 -; P9BE-NEXT: xxspltw v2, v2, 1 -; P9BE-NEXT: lfiwzx f0, 0, r3 -; P9BE-NEXT: xxsldwi v3, f0, f0, 1 -; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: lxsiwzx v3, 0, r3 +; P9BE-NEXT: vmrgow v2, v3, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: s2v_test_f4: @@ -365,10 +355,8 @@ ; P8BE-LABEL: s2v_test_f4: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: addi r3, r3, 4 -; P8BE-NEXT: xxspltw v2, v2, 1 -; P8BE-NEXT: lfiwzx f0, 0, r3 -; P8BE-NEXT: xxsldwi v3, f0, f0, 1 -; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: lxsiwzx v3, 0, r3 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %f64, i64 1 @@ -388,10 +376,8 @@ ; ; P9BE-LABEL: s2v_test_f5: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: lfiwzx f0, 0, r5 -; P9BE-NEXT: xxspltw v2, v2, 1 -; P9BE-NEXT: xxsldwi v3, f0, f0, 1 -; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: lxsiwzx v3, 0, r5 +; P9BE-NEXT: vmrgow v2, v3, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: s2v_test_f5: @@ -403,10 +389,8 @@ ; ; P8BE-LABEL: s2v_test_f5: ; P8BE: # %bb.0: # %entry -; P8BE-NEXT: lfiwzx f0, 0, r5 -; P8BE-NEXT: xxspltw v2, v2, 1 -; P8BE-NEXT: xxsldwi v3, f0, f0, 1 -; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: lxsiwzx v3, 0, r5 +; P8BE-NEXT: vmrgow v2, v3, v2 ; P8BE-NEXT: blr entry: %0 = load float, float* %ptr1, align 8 diff --git a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll --- a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll @@ -81,10 +81,9 @@ ; P9BE-NEXT: mulli r4, r4, -124 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, -21386 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 37253 -; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: ori r4, r4, 37253 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r4, r3, r4 @@ -95,13 +94,15 @@ ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, -16728 -; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; P9BE-NEXT: ori r4, r4, 63249 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 8 @@ -109,10 +110,9 @@ ; P9BE-NEXT: mulli r4, r4, -1003 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, 21399 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 33437 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 ; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: ori r4, r4, 33437 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r4, r3, r4 @@ -121,9 +121,8 @@ ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 98 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vperm v2, v2, v4, v5 ; P9BE-NEXT: vmrghw v2, v3, v2 ; P9BE-NEXT: blr ; @@ -224,19 +223,18 @@ ; P8BE-NEXT: mulli r9, r9, -124 ; P8BE-NEXT: mulli r10, r10, 95 ; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: sub r5, r6, r8 -; P8BE-NEXT: mtvsrd v2, r3 -; P8BE-NEXT: sub r6, r7, r9 -; P8BE-NEXT: sldi r3, r5, 48 +; P8BE-NEXT: addis r5, r2, .LCPI0_0@toc@ha +; P8BE-NEXT: mtvsrwz v2, r3 +; P8BE-NEXT: addi r3, r5, .LCPI0_0@toc@l +; P8BE-NEXT: sub r6, r6, r8 +; P8BE-NEXT: lxvw4x v3, 0, r3 +; P8BE-NEXT: sub r3, r7, r9 +; P8BE-NEXT: mtvsrwz v4, r6 ; P8BE-NEXT: sub r4, r4, r10 -; P8BE-NEXT: mtvsrd v3, r3 -; P8BE-NEXT: sldi r3, r6, 48 -; P8BE-NEXT: sldi r4, r4, 48 -; P8BE-NEXT: mtvsrd v4, r3 -; P8BE-NEXT: mtvsrd v5, r4 -; P8BE-NEXT: vmrghh v2, v3, v2 -; P8BE-NEXT: vmrghh v3, v5, v4 +; P8BE-NEXT: mtvsrwz v5, r3 +; P8BE-NEXT: mtvsrwz v0, r4 +; P8BE-NEXT: vperm v2, v4, v2, v3 +; P8BE-NEXT: vperm v3, v0, v5, v3 ; P8BE-NEXT: vmrghw v2, v3, v2 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, @@ -311,8 +309,7 @@ ; P9BE-NEXT: add r5, r5, r6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 @@ -323,12 +320,14 @@ ; P9BE-NEXT: add r5, r5, r6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l +; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhw r5, r3, r4 ; P9BE-NEXT: add r5, r5, r3 ; P9BE-NEXT: srwi r6, r5, 31 @@ -336,8 +335,7 @@ ; P9BE-NEXT: add r5, r5, r6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 @@ -348,9 +346,8 @@ ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vperm v2, v2, v4, v5 ; P9BE-NEXT: vmrghw v2, v2, v3 ; P9BE-NEXT: blr ; @@ -441,21 +438,20 @@ ; P8BE-NEXT: srawi r3, r3, 6 ; P8BE-NEXT: mulli r10, r10, 95 ; P8BE-NEXT: sub r5, r5, r8 +; P8BE-NEXT: addis r8, r2, .LCPI1_0@toc@ha ; P8BE-NEXT: add r3, r3, r11 -; P8BE-NEXT: sldi r5, r5, 48 +; P8BE-NEXT: mtvsrwz v2, r5 +; P8BE-NEXT: addi r5, r8, .LCPI1_0@toc@l ; P8BE-NEXT: mulli r3, r3, 95 ; P8BE-NEXT: sub r6, r6, r9 -; P8BE-NEXT: mtvsrd v2, r5 -; P8BE-NEXT: sldi r6, r6, 48 -; P8BE-NEXT: sub r7, r7, r10 -; P8BE-NEXT: mtvsrd v3, r6 +; P8BE-NEXT: lxvw4x v3, 0, r5 +; P8BE-NEXT: mtvsrwz v4, r6 +; P8BE-NEXT: sub r5, r7, r10 +; P8BE-NEXT: mtvsrwz v5, r5 ; P8BE-NEXT: sub r3, r4, r3 -; P8BE-NEXT: sldi r4, r7, 48 -; P8BE-NEXT: vmrghh v2, v3, v2 -; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: mtvsrd v4, r4 -; P8BE-NEXT: mtvsrd v5, r3 -; P8BE-NEXT: vmrghh v3, v5, v4 +; P8BE-NEXT: vperm v2, v4, v2, v3 +; P8BE-NEXT: mtvsrwz v0, r3 +; P8BE-NEXT: vperm v3, v0, v5, v3 ; P8BE-NEXT: vmrghw v2, v3, v2 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, @@ -540,8 +536,7 @@ ; P9BE-NEXT: add r4, r4, r6 ; P9BE-NEXT: mulli r6, r4, 95 ; P9BE-NEXT: sub r3, r3, r6 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r6, r3 @@ -552,12 +547,14 @@ ; P9BE-NEXT: add r6, r6, r7 ; P9BE-NEXT: mulli r7, r6, 95 ; P9BE-NEXT: sub r3, r3, r7 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l +; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: extsh r7, r3 +; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhw r8, r7, r5 ; P9BE-NEXT: add r7, r8, r7 ; P9BE-NEXT: srwi r8, r7, 31 @@ -565,8 +562,7 @@ ; P9BE-NEXT: add r7, r7, r8 ; P9BE-NEXT: mulli r8, r7, 95 ; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 @@ -576,21 +572,16 @@ ; P9BE-NEXT: srawi r5, r5, 6 ; P9BE-NEXT: add r5, r5, r8 ; P9BE-NEXT: mulli r8, r5, 95 +; P9BE-NEXT: mtvsrwz v0, r5 ; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: sldi r3, r4, 48 -; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vperm v2, v2, v4, v5 +; P9BE-NEXT: mtvsrwz v4, r6 ; P9BE-NEXT: vmrghw v2, v2, v3 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: sldi r3, r6, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: sldi r3, r7, 48 -; P9BE-NEXT: vmrghh v3, v4, v3 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: sldi r3, r5, 48 -; P9BE-NEXT: mtvsrd v5, r3 -; P9BE-NEXT: vmrghh v4, v5, v4 +; P9BE-NEXT: mtvsrwz v3, r4 +; P9BE-NEXT: vperm v3, v4, v3, v5 +; P9BE-NEXT: mtvsrwz v4, r7 +; P9BE-NEXT: vperm v4, v0, v4, v5 ; P9BE-NEXT: vmrghw v3, v4, v3 ; P9BE-NEXT: vadduhm v2, v2, v3 ; P9BE-NEXT: blr @@ -656,68 +647,65 @@ ; ; P8BE-LABEL: combine_srem_sdiv: ; P8BE: # %bb.0: -; P8BE-NEXT: mfvsrd r5, v2 -; P8BE-NEXT: lis r4, -21386 -; P8BE-NEXT: ori r4, r4, 37253 -; P8BE-NEXT: clrldi r3, r5, 48 -; P8BE-NEXT: rldicl r6, r5, 48, 48 -; P8BE-NEXT: extsh r8, r3 -; P8BE-NEXT: rldicl r7, r5, 32, 48 +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: lis r3, -21386 +; P8BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8BE-NEXT: addis r30, r2, .LCPI2_0@toc@ha +; P8BE-NEXT: ori r3, r3, 37253 +; P8BE-NEXT: clrldi r5, r4, 48 +; P8BE-NEXT: rldicl r6, r4, 48, 48 +; P8BE-NEXT: rldicl r7, r4, 32, 48 +; P8BE-NEXT: extsh r8, r5 ; P8BE-NEXT: extsh r9, r6 -; P8BE-NEXT: rldicl r5, r5, 16, 48 -; P8BE-NEXT: mulhw r11, r8, r4 ; P8BE-NEXT: extsh r10, r7 -; P8BE-NEXT: extsh r5, r5 -; P8BE-NEXT: mulhw r12, r9, r4 -; P8BE-NEXT: mulhw r0, r10, r4 -; P8BE-NEXT: mulhw r4, r5, r4 +; P8BE-NEXT: mulhw r11, r8, r3 +; P8BE-NEXT: mulhw r12, r9, r3 +; P8BE-NEXT: rldicl r4, r4, 16, 48 +; P8BE-NEXT: mulhw r0, r10, r3 +; P8BE-NEXT: extsh r4, r4 +; P8BE-NEXT: mulhw r3, r4, r3 ; P8BE-NEXT: add r8, r11, r8 ; P8BE-NEXT: add r9, r12, r9 -; P8BE-NEXT: srawi r11, r8, 6 -; P8BE-NEXT: srwi r8, r8, 31 +; P8BE-NEXT: srwi r11, r8, 31 ; P8BE-NEXT: add r10, r0, r10 -; P8BE-NEXT: add r4, r4, r5 -; P8BE-NEXT: add r8, r11, r8 +; P8BE-NEXT: srawi r8, r8, 6 +; P8BE-NEXT: addi r0, r30, .LCPI2_0@toc@l +; P8BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; P8BE-NEXT: srawi r12, r9, 6 ; P8BE-NEXT: srwi r9, r9, 31 -; P8BE-NEXT: srawi r0, r10, 6 -; P8BE-NEXT: srawi r11, r4, 6 +; P8BE-NEXT: add r8, r8, r11 +; P8BE-NEXT: add r3, r3, r4 +; P8BE-NEXT: lxvw4x v2, 0, r0 +; P8BE-NEXT: srawi r11, r10, 6 ; P8BE-NEXT: srwi r10, r10, 31 ; P8BE-NEXT: add r9, r12, r9 -; P8BE-NEXT: srwi r4, r4, 31 +; P8BE-NEXT: mtvsrwz v3, r8 ; P8BE-NEXT: mulli r12, r8, 95 -; P8BE-NEXT: add r10, r0, r10 -; P8BE-NEXT: add r4, r11, r4 -; P8BE-NEXT: mulli r0, r9, 95 -; P8BE-NEXT: sldi r9, r9, 48 -; P8BE-NEXT: sldi r8, r8, 48 -; P8BE-NEXT: mtvsrd v3, r9 -; P8BE-NEXT: mulli r9, r4, 95 -; P8BE-NEXT: mtvsrd v2, r8 -; P8BE-NEXT: mulli r8, r10, 95 -; P8BE-NEXT: sldi r10, r10, 48 -; P8BE-NEXT: sub r3, r3, r12 -; P8BE-NEXT: mtvsrd v4, r10 -; P8BE-NEXT: sub r6, r6, r0 -; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: vmrghh v2, v3, v2 -; P8BE-NEXT: sldi r6, r6, 48 -; P8BE-NEXT: mtvsrd v3, r3 -; P8BE-NEXT: sub r3, r5, r9 -; P8BE-NEXT: sub r7, r7, r8 -; P8BE-NEXT: mtvsrd v5, r6 -; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: sldi r5, r7, 48 -; P8BE-NEXT: mtvsrd v1, r3 -; P8BE-NEXT: sldi r3, r4, 48 -; P8BE-NEXT: mtvsrd v0, r5 -; P8BE-NEXT: vmrghh v3, v5, v3 -; P8BE-NEXT: mtvsrd v5, r3 -; P8BE-NEXT: vmrghh v0, v1, v0 -; P8BE-NEXT: vmrghh v4, v5, v4 -; P8BE-NEXT: vmrghw v3, v0, v3 -; P8BE-NEXT: vmrghw v2, v4, v2 -; P8BE-NEXT: vadduhm v2, v3, v2 +; P8BE-NEXT: add r10, r11, r10 +; P8BE-NEXT: srwi r11, r3, 31 +; P8BE-NEXT: mtvsrwz v4, r9 +; P8BE-NEXT: srawi r3, r3, 6 +; P8BE-NEXT: mulli r8, r9, 95 +; P8BE-NEXT: mtvsrwz v5, r10 +; P8BE-NEXT: add r3, r3, r11 +; P8BE-NEXT: mulli r9, r10, 95 +; P8BE-NEXT: vperm v3, v4, v3, v2 +; P8BE-NEXT: mulli r10, r3, 95 +; P8BE-NEXT: sub r5, r5, r12 +; P8BE-NEXT: sub r6, r6, r8 +; P8BE-NEXT: mtvsrwz v4, r5 +; P8BE-NEXT: mtvsrwz v0, r6 +; P8BE-NEXT: sub r5, r7, r9 +; P8BE-NEXT: sub r4, r4, r10 +; P8BE-NEXT: mtvsrwz v1, r5 +; P8BE-NEXT: mtvsrwz v6, r4 +; P8BE-NEXT: vperm v4, v0, v4, v2 +; P8BE-NEXT: mtvsrwz v0, r3 +; P8BE-NEXT: vperm v1, v6, v1, v2 +; P8BE-NEXT: vperm v2, v0, v5, v2 +; P8BE-NEXT: vmrghw v4, v1, v4 +; P8BE-NEXT: vmrghw v2, v2, v3 +; P8BE-NEXT: vadduhm v2, v4, v2 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, %2 = sdiv <4 x i16> %x, @@ -780,8 +768,7 @@ ; P9BE-NEXT: addze r4, r4 ; P9BE-NEXT: slwi r4, r4, 5 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 @@ -790,13 +777,15 @@ ; P9BE-NEXT: slwi r4, r4, 6 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, -21386 -; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha ; P9BE-NEXT: ori r4, r4, 37253 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l +; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 @@ -804,8 +793,7 @@ ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 @@ -813,9 +801,8 @@ ; P9BE-NEXT: addze r4, r4 ; P9BE-NEXT: slwi r4, r4, 3 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vperm v2, v2, v4, v5 ; P9BE-NEXT: vmrghw v2, v3, v2 ; P9BE-NEXT: blr ; @@ -885,24 +872,23 @@ ; P8BE-NEXT: srwi r10, r3, 31 ; P8BE-NEXT: srawi r3, r3, 6 ; P8BE-NEXT: slwi r8, r9, 6 +; P8BE-NEXT: mtvsrwz v2, r6 ; P8BE-NEXT: add r3, r3, r10 ; P8BE-NEXT: srawi r9, r4, 3 -; P8BE-NEXT: sub r7, r7, r8 +; P8BE-NEXT: addis r10, r2, .LCPI3_0@toc@ha +; P8BE-NEXT: sub r6, r7, r8 ; P8BE-NEXT: mulli r3, r3, 95 -; P8BE-NEXT: sldi r6, r6, 48 ; P8BE-NEXT: addze r8, r9 -; P8BE-NEXT: mtvsrd v2, r6 -; P8BE-NEXT: slwi r6, r8, 3 -; P8BE-NEXT: sub r4, r4, r6 -; P8BE-NEXT: sldi r4, r4, 48 +; P8BE-NEXT: addi r7, r10, .LCPI3_0@toc@l +; P8BE-NEXT: mtvsrwz v4, r6 +; P8BE-NEXT: lxvw4x v3, 0, r7 ; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: sldi r5, r7, 48 -; P8BE-NEXT: mtvsrd v5, r4 -; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: mtvsrd v3, r5 -; P8BE-NEXT: mtvsrd v4, r3 -; P8BE-NEXT: vmrghh v2, v3, v2 -; P8BE-NEXT: vmrghh v3, v5, v4 +; P8BE-NEXT: slwi r5, r8, 3 +; P8BE-NEXT: vperm v2, v4, v2, v3 +; P8BE-NEXT: sub r4, r4, r5 +; P8BE-NEXT: mtvsrwz v5, r3 +; P8BE-NEXT: mtvsrwz v0, r4 +; P8BE-NEXT: vperm v3, v0, v5, v3 ; P8BE-NEXT: vmrghw v2, v2, v3 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, @@ -973,10 +959,9 @@ ; P9BE-NEXT: mulli r4, r4, 23 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, 24749 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 47143 -; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: ori r4, r4, 47143 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r4, r3, r4 @@ -986,13 +971,15 @@ ; P9BE-NEXT: mulli r4, r4, 5423 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, -14230 -; P9BE-NEXT: sldi r3, r3, 48 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; P9BE-NEXT: ori r4, r4, 30865 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l +; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v3, v4 ; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: vperm v3, v3, v4, v5 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 @@ -1000,12 +987,10 @@ ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 654 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 +; P9BE-NEXT: mtvsrwz v2, r3 ; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: vmrghh v2, v4, v2 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: vperm v2, v4, v2, v5 ; P9BE-NEXT: vmrghw v2, v2, v3 ; P9BE-NEXT: blr ; @@ -1057,51 +1042,50 @@ ; ; P8BE-LABEL: dont_fold_srem_one: ; P8BE: # %bb.0: -; P8BE-NEXT: mfvsrd r3, v2 -; P8BE-NEXT: lis r5, 24749 -; P8BE-NEXT: lis r6, -19946 +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: lis r3, 24749 +; P8BE-NEXT: lis r7, -19946 ; P8BE-NEXT: lis r8, -14230 -; P8BE-NEXT: ori r5, r5, 47143 -; P8BE-NEXT: ori r6, r6, 17097 +; P8BE-NEXT: ori r3, r3, 47143 +; P8BE-NEXT: ori r7, r7, 17097 ; P8BE-NEXT: ori r8, r8, 30865 -; P8BE-NEXT: clrldi r4, r3, 48 -; P8BE-NEXT: rldicl r7, r3, 48, 48 -; P8BE-NEXT: rldicl r3, r3, 32, 48 +; P8BE-NEXT: clrldi r5, r4, 48 +; P8BE-NEXT: rldicl r6, r4, 48, 48 +; P8BE-NEXT: rldicl r4, r4, 32, 48 +; P8BE-NEXT: extsh r5, r5 +; P8BE-NEXT: extsh r6, r6 ; P8BE-NEXT: extsh r4, r4 -; P8BE-NEXT: extsh r7, r7 -; P8BE-NEXT: extsh r3, r3 -; P8BE-NEXT: mulhw r5, r4, r5 -; P8BE-NEXT: mulhw r6, r7, r6 -; P8BE-NEXT: mulhw r8, r3, r8 -; P8BE-NEXT: srwi r9, r5, 31 -; P8BE-NEXT: srawi r5, r5, 11 -; P8BE-NEXT: add r6, r6, r7 -; P8BE-NEXT: add r8, r8, r3 -; P8BE-NEXT: add r5, r5, r9 -; P8BE-NEXT: srwi r9, r6, 31 -; P8BE-NEXT: srawi r6, r6, 4 -; P8BE-NEXT: add r6, r6, r9 -; P8BE-NEXT: srwi r9, r8, 31 -; P8BE-NEXT: srawi r8, r8, 9 -; P8BE-NEXT: mulli r5, r5, 5423 -; P8BE-NEXT: add r8, r8, r9 -; P8BE-NEXT: mulli r6, r6, 23 -; P8BE-NEXT: li r9, 0 +; P8BE-NEXT: mulhw r3, r5, r3 +; P8BE-NEXT: mulhw r7, r6, r7 +; P8BE-NEXT: mulhw r8, r4, r8 +; P8BE-NEXT: srawi r9, r3, 11 +; P8BE-NEXT: srwi r3, r3, 31 +; P8BE-NEXT: add r7, r7, r6 +; P8BE-NEXT: add r8, r8, r4 +; P8BE-NEXT: add r3, r9, r3 +; P8BE-NEXT: srwi r9, r7, 31 +; P8BE-NEXT: srawi r7, r7, 4 +; P8BE-NEXT: srawi r10, r8, 9 +; P8BE-NEXT: srwi r8, r8, 31 +; P8BE-NEXT: add r7, r7, r9 +; P8BE-NEXT: addis r9, r2, .LCPI4_0@toc@ha +; P8BE-NEXT: mulli r3, r3, 5423 +; P8BE-NEXT: add r8, r10, r8 +; P8BE-NEXT: li r10, 0 +; P8BE-NEXT: mulli r7, r7, 23 ; P8BE-NEXT: mulli r8, r8, 654 -; P8BE-NEXT: sub r4, r4, r5 -; P8BE-NEXT: sldi r5, r9, 48 -; P8BE-NEXT: mtvsrd v2, r5 -; P8BE-NEXT: sub r5, r7, r6 -; P8BE-NEXT: sldi r4, r4, 48 -; P8BE-NEXT: sub r3, r3, r8 -; P8BE-NEXT: mtvsrd v3, r4 -; P8BE-NEXT: sldi r4, r5, 48 -; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: mtvsrd v4, r4 -; P8BE-NEXT: mtvsrd v5, r3 -; P8BE-NEXT: vmrghh v3, v4, v3 -; P8BE-NEXT: vmrghh v2, v2, v5 -; P8BE-NEXT: vmrghw v2, v2, v3 +; P8BE-NEXT: mtvsrwz v2, r10 +; P8BE-NEXT: sub r3, r5, r3 +; P8BE-NEXT: addi r5, r9, .LCPI4_0@toc@l +; P8BE-NEXT: lxvw4x v3, 0, r5 +; P8BE-NEXT: sub r5, r6, r7 +; P8BE-NEXT: mtvsrwz v4, r3 +; P8BE-NEXT: sub r3, r4, r8 +; P8BE-NEXT: mtvsrwz v5, r5 +; P8BE-NEXT: mtvsrwz v0, r3 +; P8BE-NEXT: vperm v4, v5, v4, v3 +; P8BE-NEXT: vperm v2, v2, v0, v3 +; P8BE-NEXT: vmrghw v2, v2, v4 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -1166,10 +1150,9 @@ ; P9BE-NEXT: mulli r4, r4, 23 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, 24749 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 47143 -; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: ori r4, r4, 47143 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r4, r3, r4 @@ -1178,22 +1161,22 @@ ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 5423 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI5_0@toc@l +; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v3, v4 ; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: vperm v3, v3, v4, v5 ; P9BE-NEXT: srawi r4, r3, 15 ; P9BE-NEXT: addze r4, r4 ; P9BE-NEXT: slwi r4, r4, 15 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 +; P9BE-NEXT: mtvsrwz v2, r3 ; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: vmrghh v2, v4, v2 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: vperm v2, v4, v2, v5 ; P9BE-NEXT: vmrghw v2, v2, v3 ; P9BE-NEXT: blr ; @@ -1243,8 +1226,10 @@ ; P8BE-NEXT: mfvsrd r3, v2 ; P8BE-NEXT: lis r4, 24749 ; P8BE-NEXT: lis r5, -19946 +; P8BE-NEXT: li r9, 0 ; P8BE-NEXT: ori r4, r4, 47143 ; P8BE-NEXT: ori r5, r5, 17097 +; P8BE-NEXT: mtvsrwz v2, r9 ; P8BE-NEXT: clrldi r6, r3, 48 ; P8BE-NEXT: rldicl r7, r3, 48, 48 ; P8BE-NEXT: extsh r6, r6 @@ -1261,25 +1246,22 @@ ; P8BE-NEXT: srawi r5, r5, 4 ; P8BE-NEXT: mulli r4, r4, 5423 ; P8BE-NEXT: add r5, r5, r8 -; P8BE-NEXT: li r8, 0 +; P8BE-NEXT: addis r8, r2, .LCPI5_0@toc@ha +; P8BE-NEXT: srawi r10, r3, 15 ; P8BE-NEXT: mulli r5, r5, 23 -; P8BE-NEXT: srawi r9, r3, 15 ; P8BE-NEXT: sub r4, r6, r4 -; P8BE-NEXT: sldi r6, r8, 48 -; P8BE-NEXT: addze r8, r9 -; P8BE-NEXT: mtvsrd v2, r6 +; P8BE-NEXT: addi r6, r8, .LCPI5_0@toc@l +; P8BE-NEXT: addze r8, r10 +; P8BE-NEXT: lxvw4x v3, 0, r6 ; P8BE-NEXT: slwi r6, r8, 15 -; P8BE-NEXT: sldi r4, r4, 48 +; P8BE-NEXT: mtvsrwz v4, r4 ; P8BE-NEXT: sub r5, r7, r5 ; P8BE-NEXT: sub r3, r3, r6 -; P8BE-NEXT: mtvsrd v3, r4 -; P8BE-NEXT: sldi r4, r5, 48 -; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: mtvsrd v4, r4 -; P8BE-NEXT: mtvsrd v5, r3 -; P8BE-NEXT: vmrghh v3, v4, v3 -; P8BE-NEXT: vmrghh v2, v2, v5 -; P8BE-NEXT: vmrghw v2, v2, v3 +; P8BE-NEXT: mtvsrwz v5, r5 +; P8BE-NEXT: mtvsrwz v0, r3 +; P8BE-NEXT: vperm v4, v5, v4, v3 +; P8BE-NEXT: vperm v2, v2, v0, v3 +; P8BE-NEXT: vmrghw v2, v2, v4 ; P8BE-NEXT: blr %1 = srem <4 x i16> %x, ret <4 x i16> %1 diff --git a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll --- a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll @@ -74,32 +74,32 @@ ; P9BE-NEXT: mulli r4, r4, 1003 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, 21399 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 33437 -; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: ori r4, r4, 33437 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: srwi r4, r4, 5 ; P9BE-NEXT: mulli r4, r4, 98 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r4, r3, 16 ; P9BE-NEXT: rlwinm r3, r3, 30, 18, 31 +; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhwu r3, r3, r5 ; P9BE-NEXT: srwi r3, r3, 2 ; P9BE-NEXT: mulli r3, r3, 124 ; P9BE-NEXT: sub r3, r4, r3 ; P9BE-NEXT: lis r4, 22765 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 8969 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 ; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: ori r4, r4, 8969 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r4, r3, r4 @@ -109,9 +109,8 @@ ; P9BE-NEXT: srwi r4, r4, 6 ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vperm v2, v2, v4, v5 ; P9BE-NEXT: vmrghw v2, v2, v3 ; P9BE-NEXT: blr ; @@ -178,41 +177,40 @@ ; P8BE-NEXT: rldicl r6, r4, 16, 48 ; P8BE-NEXT: clrldi r5, r4, 48 ; P8BE-NEXT: clrlwi r6, r6, 16 -; P8BE-NEXT: rldicl r8, r4, 48, 48 ; P8BE-NEXT: clrlwi r5, r5, 16 ; P8BE-NEXT: mulhwu r3, r6, r3 +; P8BE-NEXT: rldicl r8, r4, 48, 48 +; P8BE-NEXT: mulhwu r7, r5, r7 ; P8BE-NEXT: rldicl r4, r4, 32, 48 ; P8BE-NEXT: clrlwi r8, r8, 16 -; P8BE-NEXT: mulhwu r7, r5, r7 ; P8BE-NEXT: rlwinm r11, r4, 30, 18, 31 -; P8BE-NEXT: clrlwi r4, r4, 16 ; P8BE-NEXT: mulhwu r9, r8, r9 +; P8BE-NEXT: clrlwi r4, r4, 16 ; P8BE-NEXT: mulhwu r10, r11, r10 ; P8BE-NEXT: sub r11, r6, r3 -; P8BE-NEXT: srwi r11, r11, 1 ; P8BE-NEXT: srwi r7, r7, 8 +; P8BE-NEXT: srwi r11, r11, 1 ; P8BE-NEXT: add r3, r11, r3 -; P8BE-NEXT: srwi r9, r9, 5 -; P8BE-NEXT: srwi r10, r10, 2 ; P8BE-NEXT: mulli r7, r7, 1003 +; P8BE-NEXT: srwi r9, r9, 5 ; P8BE-NEXT: srwi r3, r3, 6 +; P8BE-NEXT: srwi r10, r10, 2 ; P8BE-NEXT: mulli r9, r9, 98 ; P8BE-NEXT: mulli r3, r3, 95 ; P8BE-NEXT: mulli r10, r10, 124 ; P8BE-NEXT: sub r5, r5, r7 -; P8BE-NEXT: sub r7, r8, r9 -; P8BE-NEXT: sldi r5, r5, 48 +; P8BE-NEXT: addis r7, r2, .LCPI0_0@toc@ha +; P8BE-NEXT: mtvsrwz v2, r5 +; P8BE-NEXT: addi r5, r7, .LCPI0_0@toc@l +; P8BE-NEXT: sub r8, r8, r9 +; P8BE-NEXT: lxvw4x v3, 0, r5 ; P8BE-NEXT: sub r3, r6, r3 ; P8BE-NEXT: sub r4, r4, r10 -; P8BE-NEXT: mtvsrd v2, r5 -; P8BE-NEXT: sldi r5, r7, 48 -; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: sldi r4, r4, 48 -; P8BE-NEXT: mtvsrd v3, r5 -; P8BE-NEXT: mtvsrd v4, r3 -; P8BE-NEXT: mtvsrd v5, r4 -; P8BE-NEXT: vmrghh v2, v3, v2 -; P8BE-NEXT: vmrghh v3, v4, v5 +; P8BE-NEXT: mtvsrwz v4, r8 +; P8BE-NEXT: mtvsrwz v5, r3 +; P8BE-NEXT: mtvsrwz v0, r4 +; P8BE-NEXT: vperm v2, v4, v2, v3 +; P8BE-NEXT: vperm v3, v5, v0, v3 ; P8BE-NEXT: vmrghw v2, v3, v2 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, @@ -287,8 +285,7 @@ ; P9BE-NEXT: srwi r5, r5, 6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 @@ -299,12 +296,14 @@ ; P9BE-NEXT: srwi r5, r5, 6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l +; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r3, r3, 16 +; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhwu r5, r3, r4 ; P9BE-NEXT: sub r6, r3, r5 ; P9BE-NEXT: srwi r6, r6, 1 @@ -312,8 +311,7 @@ ; P9BE-NEXT: srwi r5, r5, 6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 @@ -324,9 +322,8 @@ ; P9BE-NEXT: srwi r4, r4, 6 ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vperm v2, v2, v4, v5 ; P9BE-NEXT: vmrghw v2, v2, v3 ; P9BE-NEXT: blr ; @@ -411,27 +408,26 @@ ; P8BE-NEXT: add r10, r11, r10 ; P8BE-NEXT: srwi r11, r12, 1 ; P8BE-NEXT: srwi r9, r9, 6 +; P8BE-NEXT: mulli r8, r8, 95 ; P8BE-NEXT: add r3, r11, r3 ; P8BE-NEXT: srwi r10, r10, 6 ; P8BE-NEXT: srwi r3, r3, 6 -; P8BE-NEXT: mulli r8, r8, 95 ; P8BE-NEXT: mulli r9, r9, 95 ; P8BE-NEXT: mulli r10, r10, 95 ; P8BE-NEXT: mulli r3, r3, 95 ; P8BE-NEXT: sub r5, r5, r8 +; P8BE-NEXT: addis r8, r2, .LCPI1_0@toc@ha +; P8BE-NEXT: mtvsrwz v2, r5 +; P8BE-NEXT: addi r5, r8, .LCPI1_0@toc@l ; P8BE-NEXT: sub r6, r6, r9 -; P8BE-NEXT: sub r7, r7, r10 +; P8BE-NEXT: lxvw4x v3, 0, r5 +; P8BE-NEXT: sub r5, r7, r10 ; P8BE-NEXT: sub r3, r4, r3 -; P8BE-NEXT: sldi r5, r5, 48 -; P8BE-NEXT: sldi r6, r6, 48 -; P8BE-NEXT: sldi r4, r7, 48 -; P8BE-NEXT: mtvsrd v2, r5 -; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: mtvsrd v3, r6 -; P8BE-NEXT: mtvsrd v4, r4 -; P8BE-NEXT: mtvsrd v5, r3 -; P8BE-NEXT: vmrghh v2, v3, v2 -; P8BE-NEXT: vmrghh v3, v5, v4 +; P8BE-NEXT: mtvsrwz v4, r6 +; P8BE-NEXT: mtvsrwz v5, r5 +; P8BE-NEXT: mtvsrwz v0, r3 +; P8BE-NEXT: vperm v2, v4, v2, v3 +; P8BE-NEXT: vperm v3, v0, v5, v3 ; P8BE-NEXT: vmrghw v2, v3, v2 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, @@ -516,8 +512,7 @@ ; P9BE-NEXT: srwi r4, r4, 6 ; P9BE-NEXT: mulli r6, r4, 95 ; P9BE-NEXT: sub r3, r3, r6 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r6, r3, 16 @@ -528,12 +523,14 @@ ; P9BE-NEXT: srwi r6, r6, 6 ; P9BE-NEXT: mulli r7, r6, 95 ; P9BE-NEXT: sub r3, r3, r7 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l +; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r7, r3, 16 +; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhwu r8, r7, r5 ; P9BE-NEXT: sub r7, r7, r8 ; P9BE-NEXT: srwi r7, r7, 1 @@ -541,8 +538,7 @@ ; P9BE-NEXT: srwi r7, r7, 6 ; P9BE-NEXT: mulli r8, r7, 95 ; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 @@ -552,21 +548,16 @@ ; P9BE-NEXT: add r5, r8, r5 ; P9BE-NEXT: srwi r5, r5, 6 ; P9BE-NEXT: mulli r8, r5, 95 +; P9BE-NEXT: mtvsrwz v0, r5 ; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: sldi r3, r4, 48 -; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vperm v2, v2, v4, v5 +; P9BE-NEXT: mtvsrwz v4, r6 ; P9BE-NEXT: vmrghw v2, v2, v3 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: sldi r3, r6, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: sldi r3, r7, 48 -; P9BE-NEXT: vmrghh v3, v4, v3 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: sldi r3, r5, 48 -; P9BE-NEXT: mtvsrd v5, r3 -; P9BE-NEXT: vmrghh v4, v5, v4 +; P9BE-NEXT: mtvsrwz v3, r4 +; P9BE-NEXT: vperm v3, v4, v3, v5 +; P9BE-NEXT: mtvsrwz v4, r7 +; P9BE-NEXT: vperm v4, v0, v4, v5 ; P9BE-NEXT: vmrghw v3, v4, v3 ; P9BE-NEXT: vadduhm v2, v2, v3 ; P9BE-NEXT: blr @@ -634,68 +625,63 @@ ; ; P8BE-LABEL: combine_urem_udiv: ; P8BE: # %bb.0: -; P8BE-NEXT: mfvsrd r5, v2 -; P8BE-NEXT: lis r4, 22765 -; P8BE-NEXT: ori r4, r4, 8969 -; P8BE-NEXT: clrldi r3, r5, 48 -; P8BE-NEXT: rldicl r6, r5, 48, 48 -; P8BE-NEXT: clrlwi r8, r3, 16 -; P8BE-NEXT: rldicl r7, r5, 32, 48 +; P8BE-NEXT: mfvsrd r4, v2 +; P8BE-NEXT: lis r3, 22765 +; P8BE-NEXT: ori r3, r3, 8969 +; P8BE-NEXT: clrldi r5, r4, 48 +; P8BE-NEXT: rldicl r6, r4, 48, 48 +; P8BE-NEXT: clrlwi r8, r5, 16 ; P8BE-NEXT: clrlwi r9, r6, 16 -; P8BE-NEXT: rldicl r5, r5, 16, 48 -; P8BE-NEXT: mulhwu r10, r8, r4 +; P8BE-NEXT: rldicl r7, r4, 32, 48 +; P8BE-NEXT: rldicl r4, r4, 16, 48 +; P8BE-NEXT: mulhwu r10, r8, r3 +; P8BE-NEXT: mulhwu r12, r9, r3 ; P8BE-NEXT: clrlwi r11, r7, 16 -; P8BE-NEXT: mulhwu r12, r9, r4 -; P8BE-NEXT: clrlwi r5, r5, 16 -; P8BE-NEXT: mulhwu r0, r11, r4 -; P8BE-NEXT: mulhwu r4, r5, r4 +; P8BE-NEXT: clrlwi r4, r4, 16 +; P8BE-NEXT: mulhwu r0, r11, r3 +; P8BE-NEXT: mulhwu r3, r4, r3 ; P8BE-NEXT: sub r8, r8, r10 ; P8BE-NEXT: sub r9, r9, r12 ; P8BE-NEXT: srwi r8, r8, 1 -; P8BE-NEXT: add r8, r8, r10 -; P8BE-NEXT: sub r10, r11, r0 ; P8BE-NEXT: srwi r9, r9, 1 -; P8BE-NEXT: sub r11, r5, r4 +; P8BE-NEXT: sub r11, r11, r0 +; P8BE-NEXT: add r8, r8, r10 ; P8BE-NEXT: add r9, r9, r12 -; P8BE-NEXT: srwi r8, r8, 6 +; P8BE-NEXT: sub r12, r4, r3 +; P8BE-NEXT: addis r10, r2, .LCPI2_0@toc@ha ; P8BE-NEXT: srwi r11, r11, 1 -; P8BE-NEXT: srwi r10, r10, 1 +; P8BE-NEXT: srwi r8, r8, 6 +; P8BE-NEXT: srwi r12, r12, 1 ; P8BE-NEXT: srwi r9, r9, 6 -; P8BE-NEXT: mulli r12, r8, 95 -; P8BE-NEXT: add r4, r11, r4 -; P8BE-NEXT: add r10, r10, r0 -; P8BE-NEXT: mulli r11, r9, 95 -; P8BE-NEXT: srwi r4, r4, 6 -; P8BE-NEXT: srwi r10, r10, 6 -; P8BE-NEXT: sldi r9, r9, 48 -; P8BE-NEXT: sldi r8, r8, 48 -; P8BE-NEXT: mtvsrd v3, r9 -; P8BE-NEXT: mulli r9, r4, 95 -; P8BE-NEXT: mtvsrd v2, r8 -; P8BE-NEXT: mulli r8, r10, 95 -; P8BE-NEXT: sub r3, r3, r12 -; P8BE-NEXT: sub r6, r6, r11 -; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: vmrghh v2, v3, v2 -; P8BE-NEXT: sldi r6, r6, 48 -; P8BE-NEXT: sldi r10, r10, 48 -; P8BE-NEXT: mtvsrd v3, r3 -; P8BE-NEXT: sub r3, r5, r9 -; P8BE-NEXT: sub r7, r7, r8 -; P8BE-NEXT: mtvsrd v5, r6 -; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: sldi r5, r7, 48 -; P8BE-NEXT: mtvsrd v1, r3 -; P8BE-NEXT: sldi r3, r4, 48 -; P8BE-NEXT: mtvsrd v4, r10 -; P8BE-NEXT: mtvsrd v0, r5 -; P8BE-NEXT: vmrghh v3, v5, v3 -; P8BE-NEXT: mtvsrd v5, r3 -; P8BE-NEXT: vmrghh v0, v1, v0 -; P8BE-NEXT: vmrghh v4, v5, v4 -; P8BE-NEXT: vmrghw v3, v0, v3 -; P8BE-NEXT: vmrghw v2, v4, v2 -; P8BE-NEXT: vadduhm v2, v3, v2 +; P8BE-NEXT: addi r10, r10, .LCPI2_0@toc@l +; P8BE-NEXT: add r11, r11, r0 +; P8BE-NEXT: mulli r0, r8, 95 +; P8BE-NEXT: add r3, r12, r3 +; P8BE-NEXT: mtvsrwz v3, r8 +; P8BE-NEXT: lxvw4x v2, 0, r10 +; P8BE-NEXT: srwi r10, r11, 6 +; P8BE-NEXT: mulli r8, r9, 95 +; P8BE-NEXT: srwi r3, r3, 6 +; P8BE-NEXT: mtvsrwz v4, r9 +; P8BE-NEXT: mulli r9, r10, 95 +; P8BE-NEXT: mtvsrwz v5, r10 +; P8BE-NEXT: mulli r10, r3, 95 +; P8BE-NEXT: vperm v3, v4, v3, v2 +; P8BE-NEXT: sub r5, r5, r0 +; P8BE-NEXT: sub r6, r6, r8 +; P8BE-NEXT: mtvsrwz v4, r5 +; P8BE-NEXT: mtvsrwz v0, r6 +; P8BE-NEXT: sub r5, r7, r9 +; P8BE-NEXT: sub r4, r4, r10 +; P8BE-NEXT: mtvsrwz v1, r5 +; P8BE-NEXT: mtvsrwz v6, r4 +; P8BE-NEXT: vperm v4, v0, v4, v2 +; P8BE-NEXT: mtvsrwz v0, r3 +; P8BE-NEXT: vperm v1, v6, v1, v2 +; P8BE-NEXT: vperm v2, v0, v5, v2 +; P8BE-NEXT: vmrghw v4, v1, v4 +; P8BE-NEXT: vmrghw v2, v2, v3 +; P8BE-NEXT: vadduhm v2, v4, v2 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, %2 = udiv <4 x i16> %x, @@ -744,17 +730,18 @@ ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 8969 ; P9BE-NEXT: clrlwi r3, r3, 27 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 26 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l +; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r3, r3, 16 +; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: sub r5, r3, r4 ; P9BE-NEXT: srwi r5, r5, 1 @@ -762,14 +749,12 @@ ; P9BE-NEXT: srwi r4, r4, 6 ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 29 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: vmrghh v2, v2, v4 +; P9BE-NEXT: mtvsrwz v2, r3 +; P9BE-NEXT: vperm v2, v2, v4, v5 ; P9BE-NEXT: vmrghw v2, v3, v2 ; P9BE-NEXT: blr ; @@ -807,32 +792,31 @@ ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 ; P8BE-NEXT: lis r3, 22765 +; P8BE-NEXT: addis r7, r2, .LCPI3_0@toc@ha ; P8BE-NEXT: ori r3, r3, 8969 ; P8BE-NEXT: clrldi r5, r4, 48 -; P8BE-NEXT: rldicl r7, r4, 16, 48 +; P8BE-NEXT: rldicl r8, r4, 16, 48 ; P8BE-NEXT: clrlwi r5, r5, 16 -; P8BE-NEXT: clrlwi r7, r7, 26 ; P8BE-NEXT: mulhwu r3, r5, r3 ; P8BE-NEXT: sub r6, r5, r3 ; P8BE-NEXT: srwi r6, r6, 1 ; P8BE-NEXT: add r3, r6, r3 ; P8BE-NEXT: rldicl r6, r4, 32, 48 ; P8BE-NEXT: srwi r3, r3, 6 -; P8BE-NEXT: rldicl r4, r4, 48, 48 ; P8BE-NEXT: clrlwi r6, r6, 27 ; P8BE-NEXT: mulli r3, r3, 95 -; P8BE-NEXT: sldi r6, r6, 48 +; P8BE-NEXT: mtvsrwz v2, r6 +; P8BE-NEXT: addi r6, r7, .LCPI3_0@toc@l +; P8BE-NEXT: rldicl r4, r4, 48, 48 +; P8BE-NEXT: clrlwi r7, r8, 26 +; P8BE-NEXT: lxvw4x v3, 0, r6 ; P8BE-NEXT: clrlwi r4, r4, 29 -; P8BE-NEXT: mtvsrd v2, r6 -; P8BE-NEXT: sldi r6, r7, 48 -; P8BE-NEXT: sldi r4, r4, 48 -; P8BE-NEXT: mtvsrd v3, r6 -; P8BE-NEXT: mtvsrd v5, r4 +; P8BE-NEXT: mtvsrwz v4, r7 +; P8BE-NEXT: mtvsrwz v0, r4 ; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: vmrghh v2, v3, v2 -; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: mtvsrd v4, r3 -; P8BE-NEXT: vmrghh v3, v5, v4 +; P8BE-NEXT: vperm v2, v4, v2, v3 +; P8BE-NEXT: mtvsrwz v5, r3 +; P8BE-NEXT: vperm v3, v0, v5, v3 ; P8BE-NEXT: vmrghw v2, v2, v3 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, @@ -895,33 +879,32 @@ ; P9BE-NEXT: mulli r4, r4, 5423 ; P9BE-NEXT: sub r3, r3, r4 ; P9BE-NEXT: lis r4, -19946 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 17097 -; P9BE-NEXT: mtvsrd v3, r3 +; P9BE-NEXT: mtvsrwz v3, r3 ; P9BE-NEXT: li r3, 4 +; P9BE-NEXT: ori r4, r4, 17097 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: srwi r4, r4, 4 ; P9BE-NEXT: mulli r4, r4, 23 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l +; P9BE-NEXT: lxvx v5, 0, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r4, r3, 16 ; P9BE-NEXT: rlwinm r3, r3, 31, 17, 31 +; P9BE-NEXT: vperm v3, v4, v3, v5 ; P9BE-NEXT: mulhwu r3, r3, r5 ; P9BE-NEXT: srwi r3, r3, 8 ; P9BE-NEXT: mulli r3, r3, 654 ; P9BE-NEXT: sub r3, r4, r3 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 +; P9BE-NEXT: mtvsrwz v2, r3 ; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: vmrghh v2, v4, v2 +; P9BE-NEXT: mtvsrwz v4, r3 +; P9BE-NEXT: vperm v2, v4, v2, v5 ; P9BE-NEXT: vmrghw v2, v2, v3 ; P9BE-NEXT: blr ; @@ -970,40 +953,39 @@ ; P8BE-NEXT: lis r3, 24749 ; P8BE-NEXT: lis r7, -19946 ; P8BE-NEXT: lis r8, -14230 +; P8BE-NEXT: li r10, 0 ; P8BE-NEXT: ori r3, r3, 47143 ; P8BE-NEXT: ori r7, r7, 17097 ; P8BE-NEXT: ori r8, r8, 30865 +; P8BE-NEXT: mtvsrwz v2, r10 ; P8BE-NEXT: clrldi r5, r4, 48 ; P8BE-NEXT: rldicl r6, r4, 48, 48 -; P8BE-NEXT: rldicl r4, r4, 32, 48 ; P8BE-NEXT: clrlwi r5, r5, 16 +; P8BE-NEXT: rldicl r4, r4, 32, 48 ; P8BE-NEXT: clrlwi r6, r6, 16 ; P8BE-NEXT: mulhwu r3, r5, r3 ; P8BE-NEXT: rlwinm r9, r4, 31, 17, 31 -; P8BE-NEXT: clrlwi r4, r4, 16 ; P8BE-NEXT: mulhwu r7, r6, r7 ; P8BE-NEXT: mulhwu r8, r9, r8 -; P8BE-NEXT: li r9, 0 +; P8BE-NEXT: addis r9, r2, .LCPI4_0@toc@ha ; P8BE-NEXT: srwi r3, r3, 11 -; P8BE-NEXT: srwi r7, r7, 4 ; P8BE-NEXT: mulli r3, r3, 5423 +; P8BE-NEXT: srwi r7, r7, 4 ; P8BE-NEXT: srwi r8, r8, 8 ; P8BE-NEXT: mulli r7, r7, 23 ; P8BE-NEXT: mulli r8, r8, 654 ; P8BE-NEXT: sub r3, r5, r3 -; P8BE-NEXT: sldi r5, r9, 48 -; P8BE-NEXT: mtvsrd v2, r5 +; P8BE-NEXT: addi r5, r9, .LCPI4_0@toc@l +; P8BE-NEXT: mtvsrwz v4, r3 +; P8BE-NEXT: clrlwi r3, r4, 16 +; P8BE-NEXT: lxvw4x v3, 0, r5 ; P8BE-NEXT: sub r5, r6, r7 -; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: sub r4, r4, r8 -; P8BE-NEXT: sldi r5, r5, 48 -; P8BE-NEXT: mtvsrd v3, r3 -; P8BE-NEXT: sldi r3, r4, 48 -; P8BE-NEXT: mtvsrd v4, r5 -; P8BE-NEXT: mtvsrd v5, r3 -; P8BE-NEXT: vmrghh v3, v4, v3 -; P8BE-NEXT: vmrghh v2, v2, v5 -; P8BE-NEXT: vmrghw v2, v2, v3 +; P8BE-NEXT: sub r3, r3, r8 +; P8BE-NEXT: mtvsrwz v5, r5 +; P8BE-NEXT: mtvsrwz v0, r3 +; P8BE-NEXT: vperm v4, v5, v4, v3 +; P8BE-NEXT: vperm v2, v2, v0, v3 +; P8BE-NEXT: vmrghw v2, v2, v4 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x, ret <4 x i16> %1 diff --git a/llvm/test/CodeGen/PowerPC/vec-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-itofp.ll --- a/llvm/test/CodeGen/PowerPC/vec-itofp.ll +++ b/llvm/test/CodeGen/PowerPC/vec-itofp.ll @@ -86,7 +86,7 @@ ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI0_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI0_1@toc@l -; CHECK-BE-NEXT: vperm v3, v2, v4, v3 +; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: xvcvuxddp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI0_2@toc@ha @@ -162,7 +162,7 @@ ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI1_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI1_1@toc@l -; CHECK-BE-NEXT: vperm v3, v2, v4, v3 +; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: xvcvuxddp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: vperm v2, v4, v2, v3 @@ -210,7 +210,7 @@ ; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI2_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-NEXT: vperm v2, v4, v2, v3 ; CHECK-BE-NEXT: xvcvuxddp vs0, v2 ; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll @@ -49,19 +49,20 @@ ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtfprd f0, r3 +; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: vperm v2, v3, v4, v2 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -130,31 +131,30 @@ ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: lxvx v3, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: vperm v4, v5, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: vmrghh v2, v4, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: vperm v2, v5, v2, v3 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -271,59 +271,54 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: vperm v3, v4, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: vperm v2, v5, v0, v2 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: xxmrghd v2, v2, v3 ; CHECK-BE-NEXT: blr entry: %a = load <8 x float>, <8 x float>* %0, align 32 @@ -537,117 +532,104 @@ ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs1, 16(r4) -; CHECK-BE-NEXT: lxv vs0, 0(r4) -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: xscvspdpn f4, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: lxv vs2, 16(r4) +; CHECK-BE-NEXT: addis r5, r2, .LCPI3_0@toc@ha +; CHECK-BE-NEXT: lxv vs1, 0(r4) +; CHECK-BE-NEXT: lxv vs0, 48(r4) +; CHECK-BE-NEXT: addi r5, r5, .LCPI3_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r5 +; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 +; CHECK-BE-NEXT: xxswapd vs4, vs2 +; CHECK-BE-NEXT: xscvspdpn f5, vs2 +; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 +; CHECK-BE-NEXT: xxsldwi vs6, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs1 +; CHECK-BE-NEXT: mtvsrwz v3, r5 +; CHECK-BE-NEXT: mffprwz r5, f4 +; CHECK-BE-NEXT: xscvdpsxws f4, f5 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: mtvsrwz v4, r5 +; CHECK-BE-NEXT: vperm v3, v4, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r5, f4 +; CHECK-BE-NEXT: xscvspdpn f4, vs6 +; CHECK-BE-NEXT: mtvsrwz v4, r5 ; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v2, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: xscvdpsxws f3, f4 -; CHECK-BE-NEXT: sldi r5, r5, 48 +; CHECK-BE-NEXT: xscvspdpn f2, vs1 +; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mtvsrwz v5, r5 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v3, r5 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: mffprwz r5, f4 +; CHECK-BE-NEXT: mtvsrwz v5, r5 ; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v3, r5 +; CHECK-BE-NEXT: xxsldwi vs3, vs0, vs0, 3 +; CHECK-BE-NEXT: mtvsrwz v0, r5 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xscvspdpn f2, vs3 +; CHECK-BE-NEXT: vperm v5, v0, v5, v2 +; CHECK-BE-NEXT: mtvsrwz v0, r5 ; CHECK-BE-NEXT: mffprwz r5, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r5, r5, 48 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mtvsrwz v1, r5 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: vperm v0, v0, v1, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: mtvsrd v4, r5 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: vmrghw v4, v0, v5 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xscvspdpn f2, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrwz v1, r5 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v5, r5 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: lxv vs1, 48(r4) -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v5, r5 -; CHECK-BE-NEXT: mffprwz r5, f0 -; CHECK-BE-NEXT: lxv vs0, 32(r4) -; CHECK-BE-NEXT: xscvspdpn f5, vs1 -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v0, r5 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vmrghh v5, v5, v0 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v3, v5, v4 -; CHECK-BE-NEXT: mffprwz r4, f5 -; CHECK-BE-NEXT: xxmrghd vs4, v3, v2 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v2, r4 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: lxv vs1, 32(r4) +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mtvsrwz v6, r5 +; CHECK-BE-NEXT: vperm v1, v6, v1, v2 ; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: stxv vs4, 0(r3) -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v3, r4 -; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v4, r4 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r4 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r4 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xxmrghd vs2, v4, v3 +; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 3 +; CHECK-BE-NEXT: stxv vs2, 0(r3) +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r4 +; CHECK-BE-NEXT: vperm v3, v5, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v4, r4 +; CHECK-BE-NEXT: vmrghw v3, v3, v1 ; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v5, r4 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd vs0, v3, v2 +; CHECK-BE-NEXT: xxswapd vs0, vs1 +; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xscvspdpn f0, vs1 +; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: vperm v4, v5, v4, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 1 +; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrwz v0, r4 +; CHECK-BE-NEXT: vperm v2, v5, v0, v2 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: xxmrghd vs0, v2, v3 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: @@ -697,19 +679,20 @@ ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtfprd f0, r3 +; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: vperm v2, v3, v4, v2 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -778,31 +761,30 @@ ; CHECK-BE-LABEL: test4elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: lxvx v3, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: vperm v4, v5, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: vmrghh v2, v4, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 +; CHECK-BE-NEXT: vperm v2, v5, v2, v3 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -919,59 +901,54 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: vperm v3, v4, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: vperm v2, v5, v0, v2 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: xxmrghd v2, v2, v3 ; CHECK-BE-NEXT: blr entry: %a = load <8 x float>, <8 x float>* %0, align 32 @@ -1185,117 +1162,104 @@ ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs1, 16(r4) -; CHECK-BE-NEXT: lxv vs0, 0(r4) -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: xscvspdpn f4, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: lxv vs2, 16(r4) +; CHECK-BE-NEXT: addis r5, r2, .LCPI7_0@toc@ha +; CHECK-BE-NEXT: lxv vs1, 0(r4) +; CHECK-BE-NEXT: lxv vs0, 48(r4) +; CHECK-BE-NEXT: addi r5, r5, .LCPI7_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r5 +; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 +; CHECK-BE-NEXT: xxswapd vs4, vs2 +; CHECK-BE-NEXT: xscvspdpn f5, vs2 +; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 +; CHECK-BE-NEXT: xxsldwi vs6, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvspdpn f4, vs4 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs1 +; CHECK-BE-NEXT: mtvsrwz v3, r5 +; CHECK-BE-NEXT: mffprwz r5, f4 +; CHECK-BE-NEXT: xscvdpsxws f4, f5 +; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: mtvsrwz v4, r5 +; CHECK-BE-NEXT: vperm v3, v4, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r5, f4 +; CHECK-BE-NEXT: xscvspdpn f4, vs6 +; CHECK-BE-NEXT: mtvsrwz v4, r5 ; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v2, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: xscvdpsxws f3, f4 -; CHECK-BE-NEXT: sldi r5, r5, 48 +; CHECK-BE-NEXT: xscvspdpn f2, vs1 +; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mtvsrwz v5, r5 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v3, r5 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: mffprwz r5, f4 +; CHECK-BE-NEXT: mtvsrwz v5, r5 ; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v3, r5 +; CHECK-BE-NEXT: xxsldwi vs3, vs0, vs0, 3 +; CHECK-BE-NEXT: mtvsrwz v0, r5 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xscvspdpn f2, vs3 +; CHECK-BE-NEXT: vperm v5, v0, v5, v2 +; CHECK-BE-NEXT: mtvsrwz v0, r5 ; CHECK-BE-NEXT: mffprwz r5, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r5, r5, 48 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mtvsrwz v1, r5 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: vperm v0, v0, v1, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: mtvsrd v4, r5 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: vmrghw v4, v0, v5 +; CHECK-BE-NEXT: mffprwz r5, f2 +; CHECK-BE-NEXT: xscvspdpn f2, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrwz v1, r5 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v5, r5 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: lxv vs1, 48(r4) -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v5, r5 -; CHECK-BE-NEXT: mffprwz r5, f0 -; CHECK-BE-NEXT: lxv vs0, 32(r4) -; CHECK-BE-NEXT: xscvspdpn f5, vs1 -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v0, r5 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vmrghh v5, v5, v0 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v3, v5, v4 -; CHECK-BE-NEXT: mffprwz r4, f5 -; CHECK-BE-NEXT: xxmrghd vs4, v3, v2 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v2, r4 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: lxv vs1, 32(r4) +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mtvsrwz v6, r5 +; CHECK-BE-NEXT: vperm v1, v6, v1, v2 ; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: stxv vs4, 0(r3) -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v3, r4 -; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v4, r4 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r4 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r4 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xxmrghd vs2, v4, v3 +; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 3 +; CHECK-BE-NEXT: stxv vs2, 0(r3) +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r4 +; CHECK-BE-NEXT: vperm v3, v5, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v4, r4 +; CHECK-BE-NEXT: vmrghw v3, v3, v1 ; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v5, r4 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd vs0, v3, v2 +; CHECK-BE-NEXT: xxswapd vs0, vs1 +; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xscvspdpn f0, vs1 +; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: vperm v4, v5, v4, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 1 +; CHECK-BE-NEXT: mtvsrwz v5, r4 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrwz v0, r4 +; CHECK-BE-NEXT: vperm v2, v5, v0, v2 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: xxmrghd vs0, v2, v3 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll @@ -54,19 +54,20 @@ ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtfprd f0, r3 +; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-NEXT: vperm v2, v3, v4, v2 ; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 ; CHECK-BE-NEXT: stxsihx v2, 0, r3 ; CHECK-BE-NEXT: lhz r3, -2(r1) @@ -138,32 +139,31 @@ ; CHECK-BE-LABEL: test4elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: lxvx v3, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 +; CHECK-BE-NEXT: vperm v4, v5, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vmrghb v2, v4, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: vperm v2, v5, v2, v3 +; CHECK-BE-NEXT: vmrghh v2, v2, v4 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -283,59 +283,54 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: vperm v3, v4, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: vperm v2, v5, v0, v2 +; CHECK-BE-NEXT: vmrghh v2, v2, v4 +; CHECK-BE-NEXT: vmrghw v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -553,113 +548,100 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3 ; CHECK-BE-NEXT: xscvspdpn f4, vs4 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: xscvspdpn f4, vs3 ; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: vperm v3, v4, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghb v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvspdpn f3, vs2 ; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: vperm v5, v5, v0, v2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: vperm v5, v5, v0, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: vmrghh v4, v5, v4 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vperm v5, v0, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghb v4, v5, v4 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v0, r3 -; CHECK-BE-NEXT: vmrghb v5, v5, v0 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v1, r3 +; CHECK-BE-NEXT: vperm v2, v0, v1, v2 +; CHECK-BE-NEXT: vmrghh v2, v2, v5 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: xxmrghd v2, v2, v3 ; CHECK-BE-NEXT: blr entry: %a = load <16 x float>, <16 x float>* %0, align 64 @@ -712,19 +694,20 @@ ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtfprd f0, r3 +; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-NEXT: vperm v2, v3, v4, v2 ; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 ; CHECK-BE-NEXT: stxsihx v2, 0, r3 ; CHECK-BE-NEXT: lhz r3, -2(r1) @@ -796,32 +779,31 @@ ; CHECK-BE-LABEL: test4elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l ; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: lxvx v3, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 +; CHECK-BE-NEXT: vperm v4, v5, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vmrghb v2, v4, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: vperm v2, v5, v2, v3 +; CHECK-BE-NEXT: vmrghh v2, v2, v4 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -941,59 +923,54 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: vperm v3, v4, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: vperm v2, v5, v0, v2 +; CHECK-BE-NEXT: vmrghh v2, v2, v4 +; CHECK-BE-NEXT: vmrghw v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -1211,113 +1188,100 @@ ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI7_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xxsldwi vs4, vs3, vs3, 3 ; CHECK-BE-NEXT: xscvspdpn f4, vs4 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: xscvspdpn f4, vs3 ; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: vperm v3, v4, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghb v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvspdpn f3, vs2 ; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: vperm v5, v5, v0, v2 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: vperm v4, v5, v4, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: vperm v5, v5, v0, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: vmrghh v4, v5, v4 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: vperm v5, v0, v5, v2 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghb v4, v5, v4 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v0, r3 -; CHECK-BE-NEXT: vmrghb v5, v5, v0 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v1, r3 +; CHECK-BE-NEXT: vperm v2, v0, v1, v2 +; CHECK-BE-NEXT: vmrghh v2, v2, v5 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: xxmrghd v2, v2, v3 ; CHECK-BE-NEXT: blr entry: %a = load <16 x float>, <16 x float>* %0, align 64 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll @@ -41,16 +41,17 @@ ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xscvdpsxws f0, v2 +; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-BE-NEXT: lxvx v3, 0, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: vperm v2, v4, v2, v3 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -114,27 +115,26 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: vperm v3, v3, v4, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: vperm v2, v4, v5, v2 +; CHECK-BE-NEXT: vmrghw v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -237,52 +237,47 @@ ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: vperm v3, v3, v4, v2 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: vperm v2, v5, v0, v2 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: xxmrghd v2, v2, v3 ; CHECK-BE-NEXT: blr entry: %a = load <8 x double>, <8 x double>* %0, align 64 @@ -468,101 +463,88 @@ ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs4, 48(r4) -; CHECK-BE-NEXT: lxv vs3, 32(r4) -; CHECK-BE-NEXT: lxv vs2, 16(r4) -; CHECK-BE-NEXT: lxv vs1, 0(r4) -; CHECK-BE-NEXT: xscvdpsxws f5, f4 -; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f6, f3 +; CHECK-BE-NEXT: lxv vs3, 48(r4) +; CHECK-BE-NEXT: lxv vs2, 32(r4) +; CHECK-BE-NEXT: lxv vs1, 16(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: addis r5, r2, .LCPI3_0@toc@ha +; CHECK-BE-NEXT: addi r5, r5, .LCPI3_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r5 +; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: xscvdpsxws f5, f2 +; CHECK-BE-NEXT: xscvdpsxws f6, f1 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f7, f2 -; CHECK-BE-NEXT: lxv vs0, 112(r4) +; CHECK-BE-NEXT: xscvdpsxws f7, f0 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: xxswapd vs1, vs1 +; CHECK-BE-NEXT: xxswapd vs0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r5, f5 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v2, r5 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: xscvdpsxws f4, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r5, r5, 48 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f6 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 -; CHECK-BE-NEXT: mtvsrd v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: xscvdpsxws f3, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r5, r5, 48 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r5 -; CHECK-BE-NEXT: mffprwz r5, f7 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: mtvsrd v4, r5 ; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v5, r5 +; CHECK-BE-NEXT: mtvsrwz v3, r5 +; CHECK-BE-NEXT: mffprwz r5, f5 +; CHECK-BE-NEXT: mtvsrwz v4, r5 +; CHECK-BE-NEXT: mffprwz r5, f6 +; CHECK-BE-NEXT: mtvsrwz v5, r5 +; CHECK-BE-NEXT: mffprwz r5, f7 +; CHECK-BE-NEXT: mtvsrwz v0, r5 ; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v0, r5 +; CHECK-BE-NEXT: lxv vs3, 112(r4) +; CHECK-BE-NEXT: mtvsrwz v1, r5 ; CHECK-BE-NEXT: mffprwz r5, f2 ; CHECK-BE-NEXT: lxv vs2, 96(r4) -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v1, r5 +; CHECK-BE-NEXT: vperm v3, v3, v1, v2 +; CHECK-BE-NEXT: mtvsrwz v1, r5 ; CHECK-BE-NEXT: mffprwz r5, f1 ; CHECK-BE-NEXT: lxv vs1, 80(r4) +; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs3 +; CHECK-BE-NEXT: vperm v4, v4, v1, v2 +; CHECK-BE-NEXT: mtvsrwz v1, r5 +; CHECK-BE-NEXT: mffprwz r5, f0 +; CHECK-BE-NEXT: lxv vs0, 64(r4) +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: vperm v5, v5, v1, v2 +; CHECK-BE-NEXT: mtvsrwz v1, r5 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: vperm v0, v0, v1, v2 +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: vmrghw v5, v0, v5 +; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v4, v4, v1 -; CHECK-BE-NEXT: mtvsrd v1, r5 +; CHECK-BE-NEXT: xxmrghd vs4, v5, v3 +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r5, f0 -; CHECK-BE-NEXT: lxv vs0, 64(r4) -; CHECK-BE-NEXT: vmrghh v5, v5, v1 -; CHECK-BE-NEXT: sldi r5, r5, 48 +; CHECK-BE-NEXT: vperm v3, v4, v3, v2 +; CHECK-BE-NEXT: stxv vs4, 0(r3) ; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: mtvsrd v1, r5 -; CHECK-BE-NEXT: vmrghw v3, v5, v4 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: vmrghh v0, v0, v1 -; CHECK-BE-NEXT: xxmrghd vs3, v3, v2 -; CHECK-BE-NEXT: mtvsrd v2, r4 +; CHECK-BE-NEXT: mtvsrwz v4, r4 ; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: mtvsrwz v5, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: stxv vs3, 0(r3) -; CHECK-BE-NEXT: mtvsrd v3, r4 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: vmrghw v2, v2, v0 -; CHECK-BE-NEXT: mtvsrd v3, r4 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r4 ; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: mtvsrwz v5, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r4 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v4, r4 +; CHECK-BE-NEXT: mtvsrwz v5, r4 ; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v5, r4 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd vs0, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v0, r4 +; CHECK-BE-NEXT: vperm v2, v5, v0, v2 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: xxmrghd vs0, v2, v3 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: @@ -604,16 +586,17 @@ ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xscvdpsxws f0, v2 +; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-BE-NEXT: lxvx v3, 0, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: vperm v2, v4, v2, v3 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -677,27 +660,26 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: vperm v3, v3, v4, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v5, r3 +; CHECK-BE-NEXT: vperm v2, v4, v5, v2 +; CHECK-BE-NEXT: vmrghw v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -800,52 +782,47 @@ ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: vperm v3, v3, v4, v2 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: vperm v2, v5, v0, v2 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: xxmrghd v2, v2, v3 ; CHECK-BE-NEXT: blr entry: %a = load <8 x double>, <8 x double>* %0, align 64 @@ -1031,101 +1008,88 @@ ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs4, 48(r4) -; CHECK-BE-NEXT: lxv vs3, 32(r4) -; CHECK-BE-NEXT: lxv vs2, 16(r4) -; CHECK-BE-NEXT: lxv vs1, 0(r4) -; CHECK-BE-NEXT: xscvdpsxws f5, f4 -; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f6, f3 +; CHECK-BE-NEXT: lxv vs3, 48(r4) +; CHECK-BE-NEXT: lxv vs2, 32(r4) +; CHECK-BE-NEXT: lxv vs1, 16(r4) +; CHECK-BE-NEXT: lxv vs0, 0(r4) +; CHECK-BE-NEXT: addis r5, r2, .LCPI7_0@toc@ha +; CHECK-BE-NEXT: addi r5, r5, .LCPI7_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r5 +; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: xscvdpsxws f5, f2 +; CHECK-BE-NEXT: xscvdpsxws f6, f1 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f7, f2 -; CHECK-BE-NEXT: lxv vs0, 112(r4) +; CHECK-BE-NEXT: xscvdpsxws f7, f0 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: xxswapd vs1, vs1 +; CHECK-BE-NEXT: xxswapd vs0, vs0 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r5, f5 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v2, r5 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: xscvdpsxws f4, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r5, r5, 48 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f6 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 -; CHECK-BE-NEXT: mtvsrd v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: xscvdpsxws f3, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r5, r5, 48 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r5 -; CHECK-BE-NEXT: mffprwz r5, f7 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: mtvsrd v4, r5 ; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v5, r5 +; CHECK-BE-NEXT: mtvsrwz v3, r5 +; CHECK-BE-NEXT: mffprwz r5, f5 +; CHECK-BE-NEXT: mtvsrwz v4, r5 +; CHECK-BE-NEXT: mffprwz r5, f6 +; CHECK-BE-NEXT: mtvsrwz v5, r5 +; CHECK-BE-NEXT: mffprwz r5, f7 +; CHECK-BE-NEXT: mtvsrwz v0, r5 ; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v0, r5 +; CHECK-BE-NEXT: lxv vs3, 112(r4) +; CHECK-BE-NEXT: mtvsrwz v1, r5 ; CHECK-BE-NEXT: mffprwz r5, f2 ; CHECK-BE-NEXT: lxv vs2, 96(r4) -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v1, r5 +; CHECK-BE-NEXT: vperm v3, v3, v1, v2 +; CHECK-BE-NEXT: mtvsrwz v1, r5 ; CHECK-BE-NEXT: mffprwz r5, f1 ; CHECK-BE-NEXT: lxv vs1, 80(r4) +; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs3 +; CHECK-BE-NEXT: vperm v4, v4, v1, v2 +; CHECK-BE-NEXT: mtvsrwz v1, r5 +; CHECK-BE-NEXT: mffprwz r5, f0 +; CHECK-BE-NEXT: lxv vs0, 64(r4) +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: vperm v5, v5, v1, v2 +; CHECK-BE-NEXT: mtvsrwz v1, r5 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: vperm v0, v0, v1, v2 +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: vmrghw v5, v0, v5 +; CHECK-BE-NEXT: mtvsrwz v4, r4 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v4, v4, v1 -; CHECK-BE-NEXT: mtvsrd v1, r5 +; CHECK-BE-NEXT: xxmrghd vs4, v5, v3 +; CHECK-BE-NEXT: mtvsrwz v3, r4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r5, f0 -; CHECK-BE-NEXT: lxv vs0, 64(r4) -; CHECK-BE-NEXT: vmrghh v5, v5, v1 -; CHECK-BE-NEXT: sldi r5, r5, 48 +; CHECK-BE-NEXT: vperm v3, v4, v3, v2 +; CHECK-BE-NEXT: stxv vs4, 0(r3) ; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: mtvsrd v1, r5 -; CHECK-BE-NEXT: vmrghw v3, v5, v4 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: vmrghh v0, v0, v1 -; CHECK-BE-NEXT: xxmrghd vs3, v3, v2 -; CHECK-BE-NEXT: mtvsrd v2, r4 +; CHECK-BE-NEXT: mtvsrwz v4, r4 ; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: mtvsrwz v5, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: stxv vs3, 0(r3) -; CHECK-BE-NEXT: mtvsrd v3, r4 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: vmrghw v2, v2, v0 -; CHECK-BE-NEXT: mtvsrd v3, r4 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r4 ; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: mtvsrwz v5, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r4 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v4, r4 +; CHECK-BE-NEXT: mtvsrwz v5, r4 ; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v5, r4 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd vs0, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v0, r4 +; CHECK-BE-NEXT: vperm v2, v5, v0, v2 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: xxmrghd vs0, v2, v3 ; CHECK-BE-NEXT: stxv vs0, 16(r3) ; CHECK-BE-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll @@ -46,7 +46,7 @@ ; CHECK-BE-NEXT: xscvdpuxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: mtvsrws v2, r3 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -338,7 +338,7 @@ ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: mtvsrws v2, r3 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: vmrgow v2, v3, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll @@ -46,16 +46,17 @@ ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xscvdpsxws f0, v2 +; CHECK-BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-BE-NEXT: lxvx v3, 0, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: vmrghb v2, v3, v2 +; CHECK-BE-NEXT: vperm v2, v4, v2, v3 ; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 ; CHECK-BE-NEXT: stxsihx v2, 0, r3 ; CHECK-BE-NEXT: lhz r3, -2(r1) @@ -122,28 +123,27 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-NEXT: vperm v3, v3, v4, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: vperm v2, v4, v5, v2 +; CHECK-BE-NEXT: vmrghh v2, v2, v3 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -249,52 +249,47 @@ ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-NEXT: vperm v3, v3, v4, v2 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: vperm v2, v5, v0, v2 +; CHECK-BE-NEXT: vmrghh v2, v2, v4 +; CHECK-BE-NEXT: vmrghw v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -481,100 +476,87 @@ ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs7, 112(r3) -; CHECK-BE-NEXT: lxv vs6, 96(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: xscvdpsxws f8, f7 ; CHECK-BE-NEXT: xxswapd vs7, vs7 -; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs4, 64(r3) ; CHECK-BE-NEXT: lxv vs5, 80(r3) +; CHECK-BE-NEXT: lxv vs6, 96(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f7, f7 ; CHECK-BE-NEXT: mffprwz r3, f8 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f7 ; CHECK-BE-NEXT: xscvdpsxws f7, f6 ; CHECK-BE-NEXT: xxswapd vs6, vs6 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f6, f6 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-NEXT: vperm v3, v3, v4, v2 ; CHECK-BE-NEXT: mffprwz r3, f7 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f6 ; CHECK-BE-NEXT: xscvdpsxws f6, f5 ; CHECK-BE-NEXT: xxswapd vs5, vs5 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f6 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f5 ; CHECK-BE-NEXT: xscvdpsxws f5, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 +; CHECK-BE-NEXT: vperm v5, v5, v0, v2 +; CHECK-BE-NEXT: vmrghh v4, v5, v4 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 +; CHECK-BE-NEXT: vperm v5, v5, v0, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 +; CHECK-BE-NEXT: vperm v5, v5, v0, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v0, r3 -; CHECK-BE-NEXT: vmrghb v5, v5, v0 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v1, r3 +; CHECK-BE-NEXT: vperm v2, v0, v1, v2 +; CHECK-BE-NEXT: vmrghh v2, v2, v5 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: xxmrghd v2, v2, v3 ; CHECK-BE-NEXT: blr entry: %a = load <16 x double>, <16 x double>* %0, align 128 @@ -619,16 +601,17 @@ ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xscvdpsxws f0, v2 +; CHECK-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-BE-NEXT: lxvx v3, 0, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v2, r3 ; CHECK-BE-NEXT: addi r3, r1, -2 -; CHECK-BE-NEXT: vmrghb v2, v3, v2 +; CHECK-BE-NEXT: vperm v2, v4, v2, v3 ; CHECK-BE-NEXT: vsldoi v2, v2, v2, 10 ; CHECK-BE-NEXT: stxsihx v2, 0, r3 ; CHECK-BE-NEXT: lhz r3, -2(r1) @@ -695,28 +678,27 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-NEXT: vperm v3, v3, v4, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: vperm v2, v4, v5, v2 +; CHECK-BE-NEXT: vmrghh v2, v2, v3 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -822,52 +804,47 @@ ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) -; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-NEXT: vperm v3, v3, v4, v2 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v0, r3 +; CHECK-BE-NEXT: vperm v2, v5, v0, v2 +; CHECK-BE-NEXT: vmrghh v2, v2, v4 +; CHECK-BE-NEXT: vmrghw v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -1054,100 +1031,87 @@ ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs7, 112(r3) -; CHECK-BE-NEXT: lxv vs6, 96(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: xscvdpsxws f8, f7 ; CHECK-BE-NEXT: xxswapd vs7, vs7 -; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs4, 64(r3) ; CHECK-BE-NEXT: lxv vs5, 80(r3) +; CHECK-BE-NEXT: lxv vs6, 96(r3) +; CHECK-BE-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; CHECK-BE-NEXT: addi r3, r3, .LCPI7_0@toc@l +; CHECK-BE-NEXT: lxvx v2, 0, r3 ; CHECK-BE-NEXT: xscvdpsxws f7, f7 ; CHECK-BE-NEXT: mffprwz r3, f8 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mtvsrwz v3, r3 ; CHECK-BE-NEXT: mffprwz r3, f7 ; CHECK-BE-NEXT: xscvdpsxws f7, f6 ; CHECK-BE-NEXT: xxswapd vs6, vs6 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f6, f6 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-NEXT: vperm v3, v3, v4, v2 ; CHECK-BE-NEXT: mffprwz r3, f7 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f6 ; CHECK-BE-NEXT: xscvdpsxws f6, f5 ; CHECK-BE-NEXT: xxswapd vs5, vs5 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f6 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: vmrghh v3, v4, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f5 ; CHECK-BE-NEXT: xscvdpsxws f5, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 +; CHECK-BE-NEXT: vperm v5, v5, v0, v2 +; CHECK-BE-NEXT: vmrghh v4, v5, v4 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: vmrghw v3, v4, v3 +; CHECK-BE-NEXT: mtvsrwz v4, r3 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vperm v4, v4, v5, v2 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 +; CHECK-BE-NEXT: vperm v5, v5, v0, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: mtvsrwz v5, r3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 +; CHECK-BE-NEXT: vperm v5, v5, v0, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: mtvsrwz v0, r3 ; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v0, r3 -; CHECK-BE-NEXT: vmrghb v5, v5, v0 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mtvsrwz v1, r3 +; CHECK-BE-NEXT: vperm v2, v0, v1, v2 +; CHECK-BE-NEXT: vmrghh v2, v2, v5 +; CHECK-BE-NEXT: vmrghw v2, v2, v4 +; CHECK-BE-NEXT: xxmrghd v2, v2, v3 ; CHECK-BE-NEXT: blr entry: %a = load <16 x double>, <16 x double>* %0, align 128 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll @@ -67,7 +67,7 @@ ; CHECK-BE-NEXT: mtfprwz f0, r3 ; CHECK-BE-NEXT: xscvuxdsp f0, f0 ; CHECK-BE-NEXT: xscvdpspn v2, f0 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: vmrgow v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -101,7 +101,7 @@ ; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r3 -; CHECK-BE-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-NEXT: vperm v2, v4, v2, v3 ; CHECK-BE-NEXT: xvcvuxwsp v2, v2 ; CHECK-BE-NEXT: blr entry: @@ -136,19 +136,13 @@ ; ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha -; CHECK-BE-NEXT: xxlxor v4, v4, v4 -; CHECK-BE-NEXT: addi r4, r4, .LCPI2_0@toc@l -; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: addis r4, r2, .LCPI2_1@toc@ha -; CHECK-BE-NEXT: addi r4, r4, .LCPI2_1@toc@l -; CHECK-BE-NEXT: vperm v3, v2, v4, v3 -; CHECK-BE-NEXT: xvcvuxwsp vs0, v3 -; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: vperm v2, v4, v2, v3 -; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-NEXT: vmrghh v4, v3, v2 +; CHECK-BE-NEXT: vmrglh v2, v3, v2 +; CHECK-BE-NEXT: xvcvuxwsp vs0, v4 ; CHECK-BE-NEXT: xvcvuxwsp vs1, v2 ; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: blr entry: %0 = uitofp <8 x i16> %a to <8 x float> @@ -218,21 +212,18 @@ ; CHECK-BE-NEXT: xxlxor v5, v5, v5 ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_0@toc@l ; CHECK-BE-NEXT: lxvx v4, 0, r4 -; CHECK-BE-NEXT: addis r4, r2, .LCPI3_1@toc@ha -; CHECK-BE-NEXT: addi r4, r4, .LCPI3_1@toc@l -; CHECK-BE-NEXT: vperm v0, v3, v5, v4 +; CHECK-BE-NEXT: vperm v0, v5, v3, v4 +; CHECK-BE-NEXT: vperm v4, v5, v2, v4 +; CHECK-BE-NEXT: vmrglh v3, v5, v3 +; CHECK-BE-NEXT: vmrglh v2, v5, v2 ; CHECK-BE-NEXT: xvcvuxwsp vs0, v0 -; CHECK-BE-NEXT: lxvx v0, 0, r4 -; CHECK-BE-NEXT: vperm v3, v5, v3, v0 -; CHECK-BE-NEXT: stxv vs0, 0(r3) -; CHECK-BE-NEXT: xvcvuxwsp vs1, v3 -; CHECK-BE-NEXT: vperm v3, v2, v5, v4 -; CHECK-BE-NEXT: vperm v2, v5, v2, v0 +; CHECK-BE-NEXT: xvcvuxwsp vs1, v4 +; CHECK-BE-NEXT: stxv vs1, 32(r3) ; CHECK-BE-NEXT: xvcvuxwsp vs2, v3 ; CHECK-BE-NEXT: xvcvuxwsp vs3, v2 -; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: stxv vs3, 48(r3) -; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: stxv vs2, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: blr entry: %a = load <16 x i16>, <16 x i16>* %0, align 32 @@ -299,7 +290,7 @@ ; CHECK-BE-NEXT: mtfprwa f0, r3 ; CHECK-BE-NEXT: xscvsxdsp f0, f0 ; CHECK-BE-NEXT: xscvdpspn v2, f0 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: vmrgow v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -374,18 +365,15 @@ ; ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha -; CHECK-BE-NEXT: xxlxor v4, v4, v4 -; CHECK-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxvx v3, 0, r4 -; CHECK-BE-NEXT: vperm v3, v4, v2, v3 +; CHECK-BE-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-NEXT: vmrglh v3, v3, v2 ; CHECK-BE-NEXT: vmrghh v2, v2, v2 ; CHECK-BE-NEXT: vextsh2w v3, v3 ; CHECK-BE-NEXT: vextsh2w v2, v2 ; CHECK-BE-NEXT: xvcvsxwsp vs0, v3 ; CHECK-BE-NEXT: xvcvsxwsp vs1, v2 -; CHECK-BE-NEXT: stxv vs1, 0(r3) ; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: stxv vs1, 0(r3) ; CHECK-BE-NEXT: blr entry: %0 = sitofp <8 x i16> %a to <8 x float> @@ -451,26 +439,23 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v2, 16(r4) ; CHECK-BE-NEXT: lxv v3, 0(r4) -; CHECK-BE-NEXT: addis r4, r2, .LCPI7_0@toc@ha -; CHECK-BE-NEXT: xxlxor v5, v5, v5 -; CHECK-BE-NEXT: addi r4, r4, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxvx v4, 0, r4 -; CHECK-BE-NEXT: vperm v0, v5, v3, v4 -; CHECK-BE-NEXT: vperm v4, v5, v2, v4 +; CHECK-BE-NEXT: xxlxor v4, v4, v4 +; CHECK-BE-NEXT: vmrglh v5, v4, v3 +; CHECK-BE-NEXT: vmrglh v4, v4, v2 ; CHECK-BE-NEXT: vmrghh v3, v3, v3 ; CHECK-BE-NEXT: vmrghh v2, v2, v2 -; CHECK-BE-NEXT: vextsh2w v0, v0 +; CHECK-BE-NEXT: vextsh2w v5, v5 ; CHECK-BE-NEXT: vextsh2w v4, v4 ; CHECK-BE-NEXT: vextsh2w v3, v3 ; CHECK-BE-NEXT: vextsh2w v2, v2 -; CHECK-BE-NEXT: xvcvsxwsp vs0, v0 +; CHECK-BE-NEXT: xvcvsxwsp vs0, v5 ; CHECK-BE-NEXT: xvcvsxwsp vs1, v4 ; CHECK-BE-NEXT: xvcvsxwsp vs2, v3 ; CHECK-BE-NEXT: xvcvsxwsp vs3, v2 -; CHECK-BE-NEXT: stxv vs3, 32(r3) -; CHECK-BE-NEXT: stxv vs2, 0(r3) ; CHECK-BE-NEXT: stxv vs1, 48(r3) +; CHECK-BE-NEXT: stxv vs3, 32(r3) ; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: stxv vs2, 0(r3) ; CHECK-BE-NEXT: blr entry: %a = load <16 x i16>, <16 x i16>* %0, align 32 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll @@ -39,7 +39,7 @@ ; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r3 -; CHECK-BE-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-NEXT: vperm v2, v4, v2, v3 ; CHECK-BE-NEXT: xvcvuxddp v2, v2 ; CHECK-BE-NEXT: blr entry: @@ -97,7 +97,7 @@ ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI1_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI1_1@toc@l -; CHECK-BE-NEXT: vperm v3, v2, v4, v3 +; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: xvcvuxddp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: vperm v2, v4, v2, v3 @@ -186,7 +186,7 @@ ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI2_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI2_1@toc@l -; CHECK-BE-NEXT: vperm v3, v2, v4, v3 +; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: xvcvuxddp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI2_2@toc@ha @@ -324,7 +324,7 @@ ; CHECK-BE-NEXT: lxvx v4, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_1@toc@l -; CHECK-BE-NEXT: vperm v0, v3, v5, v4 +; CHECK-BE-NEXT: vperm v0, v5, v3, v4 ; CHECK-BE-NEXT: xvcvuxddp vs0, v0 ; CHECK-BE-NEXT: lxvx v0, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_2@toc@ha @@ -342,7 +342,7 @@ ; CHECK-BE-NEXT: vperm v3, v5, v3, v6 ; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: xvcvuxddp vs3, v3 -; CHECK-BE-NEXT: vperm v3, v2, v5, v4 +; CHECK-BE-NEXT: vperm v3, v5, v2, v4 ; CHECK-BE-NEXT: xvcvuxddp vs4, v3 ; CHECK-BE-NEXT: vperm v3, v5, v2, v0 ; CHECK-BE-NEXT: xvcvuxddp vs5, v3 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll @@ -47,7 +47,7 @@ ; CHECK-BE-NEXT: xscvuxdsp f0, f0 ; CHECK-BE-NEXT: xscvdpspn v2, f1 ; CHECK-BE-NEXT: xscvdpspn v3, f0 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: vmrgow v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -340,7 +340,7 @@ ; CHECK-BE-NEXT: xscvsxdsp f0, f0 ; CHECK-BE-NEXT: xscvdpspn v2, f1 ; CHECK-BE-NEXT: xscvdpspn v3, f0 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: vmrgow v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll @@ -67,7 +67,7 @@ ; CHECK-BE-NEXT: mtfprwz f0, r3 ; CHECK-BE-NEXT: xscvuxdsp f0, f0 ; CHECK-BE-NEXT: xscvdpspn v2, f0 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: vmrgow v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -107,7 +107,7 @@ ; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r3, r3, .LCPI1_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r3 -; CHECK-BE-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-NEXT: vperm v2, v4, v2, v3 ; CHECK-BE-NEXT: xvcvuxwsp v2, v2 ; CHECK-BE-NEXT: blr entry: @@ -163,7 +163,7 @@ ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI2_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI2_1@toc@l -; CHECK-BE-NEXT: vperm v3, v2, v4, v3 +; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: xvcvuxwsp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: vperm v2, v4, v2, v3 @@ -248,7 +248,7 @@ ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_1@toc@l -; CHECK-BE-NEXT: vperm v3, v2, v4, v3 +; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: xvcvuxwsp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_2@toc@ha @@ -332,7 +332,7 @@ ; CHECK-BE-NEXT: mtfprwa f0, r3 ; CHECK-BE-NEXT: xscvsxdsp f0, f0 ; CHECK-BE-NEXT: xscvdpspn v2, f0 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: vmrgow v2, v2, v3 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll @@ -39,7 +39,7 @@ ; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r3, r3, .LCPI0_0@toc@l ; CHECK-BE-NEXT: lxvx v3, 0, r3 -; CHECK-BE-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-NEXT: vperm v2, v4, v2, v3 ; CHECK-BE-NEXT: xvcvuxddp v2, v2 ; CHECK-BE-NEXT: blr entry: @@ -97,7 +97,7 @@ ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI1_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI1_1@toc@l -; CHECK-BE-NEXT: vperm v3, v2, v4, v3 +; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: xvcvuxddp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: vperm v2, v4, v2, v3 @@ -189,7 +189,7 @@ ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI2_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI2_1@toc@l -; CHECK-BE-NEXT: vperm v3, v2, v4, v3 +; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: xvcvuxddp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI2_2@toc@ha @@ -346,7 +346,7 @@ ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_1@toc@l -; CHECK-BE-NEXT: vperm v3, v2, v4, v3 +; CHECK-BE-NEXT: vperm v3, v4, v2, v3 ; CHECK-BE-NEXT: xvcvuxddp vs0, v3 ; CHECK-BE-NEXT: lxvx v3, 0, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_2@toc@ha diff --git a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll --- a/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll +++ b/llvm/test/CodeGen/PowerPC/vec_insert_elt.ll @@ -354,7 +354,6 @@ ; CHECK-BE-LABEL: testFloatImm1: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: xscvdpspn vs0, f1 -; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 3 ; CHECK-BE-NEXT: xxinsertw v2, vs0, 0 ; CHECK-BE-NEXT: xxinsertw v2, vs0, 8 ; CHECK-BE-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vec_int_ext.ll b/llvm/test/CodeGen/PowerPC/vec_int_ext.ll --- a/llvm/test/CodeGen/PowerPC/vec_int_ext.ll +++ b/llvm/test/CodeGen/PowerPC/vec_int_ext.ll @@ -345,37 +345,32 @@ ; CHECK-BE-NEXT: li 6, 6 ; CHECK-BE-NEXT: extsb 9, 9 ; CHECK-BE-NEXT: extsb 10, 10 -; CHECK-BE-NEXT: extsb 7, 7 -; CHECK-BE-NEXT: extsb 8, 8 ; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: vextublx 4, 4, 2 +; CHECK-BE-NEXT: vextublx 5, 5, 2 +; CHECK-BE-NEXT: extsb 7, 7 +; CHECK-BE-NEXT: extsb 8, 8 +; CHECK-BE-NEXT: extsb 5, 5 ; CHECK-BE-NEXT: extsb 3, 3 ; CHECK-BE-NEXT: extsb 4, 4 -; CHECK-BE-NEXT: sldi 10, 10, 48 -; CHECK-BE-NEXT: sldi 9, 9, 48 -; CHECK-BE-NEXT: vextublx 5, 5, 2 +; CHECK-BE-NEXT: mtvsrwz 35, 9 +; CHECK-BE-NEXT: addis 9, 2, .LCPI11_0@toc@ha ; CHECK-BE-NEXT: vextublx 6, 6, 2 -; CHECK-BE-NEXT: sldi 8, 8, 48 -; CHECK-BE-NEXT: sldi 7, 7, 48 -; CHECK-BE-NEXT: extsb 5, 5 +; CHECK-BE-NEXT: mtvsrwz 34, 10 +; CHECK-BE-NEXT: mtvsrwz 37, 7 ; CHECK-BE-NEXT: extsb 6, 6 -; CHECK-BE-NEXT: sldi 6, 6, 48 -; CHECK-BE-NEXT: sldi 5, 5, 48 -; CHECK-BE-NEXT: sldi 4, 4, 48 -; CHECK-BE-NEXT: sldi 3, 3, 48 -; CHECK-BE-NEXT: mtvsrd 34, 10 -; CHECK-BE-NEXT: mtvsrd 35, 9 -; CHECK-BE-NEXT: mtvsrd 36, 7 -; CHECK-BE-NEXT: mtvsrd 37, 3 -; CHECK-BE-NEXT: vmrghh 2, 3, 2 -; CHECK-BE-NEXT: mtvsrd 35, 8 -; CHECK-BE-NEXT: vmrghh 3, 4, 3 -; CHECK-BE-NEXT: mtvsrd 36, 5 +; CHECK-BE-NEXT: mtvsrwz 32, 3 +; CHECK-BE-NEXT: addi 9, 9, .LCPI11_0@toc@l +; CHECK-BE-NEXT: lxvx 36, 0, 9 +; CHECK-BE-NEXT: vperm 2, 3, 2, 4 +; CHECK-BE-NEXT: mtvsrwz 35, 8 +; CHECK-BE-NEXT: vperm 3, 5, 3, 4 +; CHECK-BE-NEXT: mtvsrwz 37, 5 ; CHECK-BE-NEXT: vmrghw 2, 3, 2 -; CHECK-BE-NEXT: mtvsrd 35, 6 -; CHECK-BE-NEXT: vmrghh 3, 4, 3 -; CHECK-BE-NEXT: mtvsrd 36, 4 -; CHECK-BE-NEXT: vmrghh 4, 5, 4 +; CHECK-BE-NEXT: mtvsrwz 35, 6 +; CHECK-BE-NEXT: vperm 3, 5, 3, 4 +; CHECK-BE-NEXT: mtvsrwz 37, 4 +; CHECK-BE-NEXT: vperm 4, 0, 5, 4 ; CHECK-BE-NEXT: vmrghw 3, 4, 3 ; CHECK-BE-NEXT: xxmrghd 34, 35, 34 ; CHECK-BE-NEXT: blr