Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -446,7 +446,11 @@ /// a VMRGEW or VMRGOW instruction bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG); - + /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXSLDWI instruction. + bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE); + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the /// shift amount, otherwise return -1. int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -1573,9 +1573,8 @@ return true; } -bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, - unsigned &InsertAtByte, bool &Swap, bool IsLE) { // Check that the mask is shuffling words +static bool isWordShuffleMask(ShuffleVectorSDNode *N) { for (unsigned i = 0; i < 4; ++i) { unsigned B0 = N->getMaskElt(i*4); unsigned B1 = N->getMaskElt(i*4+1); @@ -1587,6 +1586,14 @@ return false; } + return true; +} + +bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + unsigned &InsertAtByte, bool &Swap, bool IsLE) { + if (!isWordShuffleMask(N)) + return false; + // Now we look at mask elements 0,4,8,12 unsigned M0 = N->getMaskElt(0) / 4; unsigned M1 = N->getMaskElt(4) / 4; @@ -1657,6 +1664,63 @@ return false; } +bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE) { + if (N->getValueType(0) != MVT::v16i8) + return false; + + // Ensure each byte index of the word is consecutive. + if (!isWordShuffleMask(N)) + return false; + + // Now we look at mask elements 0,4,8,12, which are the beginning of words. + unsigned M0 = N->getMaskElt(0) / 4; + unsigned M1 = N->getMaskElt(4) / 4; + unsigned M2 = N->getMaskElt(8) / 4; + unsigned M3 = N->getMaskElt(12) / 4; + + // If both vector operands for the shuffle are the same vector, the mask will + // contain only elements from the first one and the second one will be undef. + if (N->getOperand(1).isUndef()) { + if (M0 != 0 && M0 != 1 && M0 != 2 && M0 != 3) + return false; + + if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4) + return false; + + ShiftElts = IsLE ? (4 - M0) % 4 : M0; + Swap = false; + return true; + } + + // Ensure each word index of the ShuffleVector Mask is consecutive. + if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8) + return false; + + if (IsLE) { + if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) { + Swap = false; + ShiftElts = (8 - M0) % 8; + } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) { + Swap = true; + ShiftElts = (4 - M0) % 4; + } + + return true; + } else { // BE + if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) { + Swap = false; + ShiftElts = M0; + } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) { + Swap = true; + ShiftElts = M0 - 4; + } + + return true; + } +} + + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, @@ -7656,6 +7720,20 @@ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } + + if (Subtarget.hasVSX() && + PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { + if (Swap) + std::swap(V1, V2); + SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue Conv2 = + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2); + + SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2, + DAG.getConstant(ShiftElts, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl); + } + if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -46,7 +46,7 @@ ]>; def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, - SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> + SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3> ]>; def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>, Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -1066,6 +1066,10 @@ def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>; +// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and +// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable. +def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>; + // Selects. def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; Index: test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll =================================================================== --- test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll +++ test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll @@ -21,7 +21,7 @@ ret <16 x i8> %strided.vec ; CHECK-LABEL: @test2 -; CHECK: vsldoi 2, 2, 2, 12 +; CHECK: xxsldwi 34, 34, 34, 3 ; CHECK: blr } Index: test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll =================================================================== --- test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll +++ test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll @@ -6,7 +6,7 @@ define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -45,7 +45,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 0 %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %vecins @@ -54,7 +54,7 @@ define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -93,7 +93,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 4 %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %vecins @@ -102,7 +102,7 @@ define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -141,7 +141,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 8 %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %vecins @@ -150,7 +150,7 @@ define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -189,7 +189,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 12 %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %vecins @@ -198,7 +198,7 @@ define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -237,7 +237,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 0 %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %vecins @@ -246,7 +246,7 @@ define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -285,7 +285,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 4 %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %vecins @@ -294,7 +294,7 @@ define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -333,7 +333,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 8 %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %vecins @@ -342,7 +342,7 @@ define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -381,7 +381,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 12 %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %vecins @@ -546,7 +546,7 @@ define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -585,7 +585,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 0 %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> ret <4 x float> %vecins @@ -594,7 +594,7 @@ define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -633,7 +633,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 4 %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> ret <4 x float> %vecins @@ -642,7 +642,7 @@ define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -681,7 +681,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 8 %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> ret <4 x float> %vecins @@ -690,7 +690,7 @@ define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { entry: ; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -729,7 +729,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 12 %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> ret <4 x float> %vecins @@ -738,7 +738,7 @@ define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -777,7 +777,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 12 ; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 0 %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> ret <4 x i32> %vecins @@ -786,7 +786,7 @@ define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -825,7 +825,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 8 ; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 4 %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> ret <4 x i32> %vecins @@ -834,7 +834,7 @@ define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -873,7 +873,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 4 ; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 8 %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> ret <4 x i32> %vecins @@ -882,7 +882,7 @@ define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { entry: ; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ -; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxswapd 0, 35 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ ; CHECK-BE: xxsldwi 0, 35, 35, 3 @@ -921,7 +921,7 @@ ; CHECK: xxsldwi 0, 35, 35, 3 ; CHECK: xxinsertw 34, 0, 0 ; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_ -; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxswapd 0, 35 ; CHECK-BE: xxinsertw 34, 0, 12 %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> ret <4 x i32> %vecins Index: test/CodeGen/PowerPC/pr27078.ll =================================================================== --- test/CodeGen/PowerPC/pr27078.ll +++ test/CodeGen/PowerPC/pr27078.ll @@ -9,11 +9,11 @@ %6 = shufflevector <12 x float> %5, <12 x float> undef, <4 x i32> ret <4 x float> %6 -; CHECK: vsldoi +; CHECK: xxsldwi ; CHECK-NEXT: vmrghw ; CHECK-NEXT: vmrglw -; CHECK-NEXT: vsldoi -; CHECK-NEXT: vsldoi -; CHECK-NEXT: vsldoi +; CHECK-NEXT: xxsldwi +; CHECK-NEXT: xxsldwi +; CHECK-NEXT: xxsldwi ; CHECK-NEXT: blr } Index: test/CodeGen/PowerPC/vec_sldwi.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/vec_sldwi.ll @@ -0,0 +1,307 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | \ +; RUN: FileCheck %s -check-prefix=CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | \ +; RUN: FileCheck %s -check-prefix=CHECK-BE + +; Possible LE ShuffleVector masks (Case 1): +; ShuffleVector((vector int)a, vector(int)b, 0, 1, 2, 3) +; ShuffleVector((vector int)a, vector(int)b, 7, 0, 1, 2) +; ShuffleVector((vector int)a, vector(int)b, 6, 7, 0, 1) +; ShuffleVector((vector int)a, vector(int)b, 5, 6, 7, 0) +; which targets at: +; xxsldwi a, b, 0 +; xxsldwi a, b, 1 +; xxsldwi a, b, 2 +; xxsldwi a, b, 3 +; Possible LE Swap ShuffleVector masks (Case 2): +; ShuffleVector((vector int)a, vector(int)b, 4, 5, 6, 7) +; ShuffleVector((vector int)a, vector(int)b, 3, 4, 5, 6) +; ShuffleVector((vector int)a, vector(int)b, 2, 3, 4, 5) +; ShuffleVector((vector int)a, vector(int)b, 1, 2, 3, 4) +; which targets at: +; xxsldwi b, a, 0 +; xxsldwi b, a, 1 +; xxsldwi b, a, 2 +; xxsldwi b, a, 3 +; Possible LE ShuffleVector masks when a == b, b is undef (Case 3): +; ShuffleVector((vector int)a, vector(int)a, 0, 1, 2, 3) +; ShuffleVector((vector int)a, vector(int)a, 3, 0, 1, 2) +; ShuffleVector((vector int)a, vector(int)a, 2, 3, 0, 1) +; ShuffleVector((vector int)a, vector(int)a, 1, 2, 3, 0) +; which targets at: +; xxsldwi a, a, 0 +; xxsldwi a, a, 1 +; xxsldwi a, a, 2 +; xxsldwi a, a, 3 + +; Possible BE ShuffleVector masks (Case 4): +; ShuffleVector((vector int)a, vector(int)b, 0, 1, 2, 3) +; ShuffleVector((vector int)a, vector(int)b, 1, 2, 3, 4) +; ShuffleVector((vector int)a, vector(int)b, 2, 3, 4, 5) +; ShuffleVector((vector int)a, vector(int)b, 3, 4, 5, 6) +; which targets at: +; xxsldwi b, a, 0 +; xxsldwi b, a, 1 +; xxsldwi a, a, 2 +; xxsldwi a, a, 3 +; Possible BE Swap ShuffleVector masks (Case 5): +; ShuffleVector((vector int)a, vector(int)b, 4, 5, 6, 7) +; ShuffleVector((vector int)a, vector(int)b, 5, 6, 7, 0) +; ShuffleVector((vector int)a, vector(int)b, 6, 7, 0, 1) +; ShuffleVector((vector int)a, vector(int)b, 7, 0, 1, 2) +; which targets at: +; xxsldwi b, a, 0 +; xxsldwi b, a, 1 +; xxsldwi b, a, 2 +; xxsldwi b, a, 3 +; Possible BE ShuffleVector masks when a == b, b is undef (Case 6): +; ShuffleVector((vector int)a, vector(int)b, 0, 1, 2, 3) +; ShuffleVector((vector int)a, vector(int)a, 1, 2, 3, 0) +; ShuffleVector((vector int)a, vector(int)a, 2, 3, 0, 1) +; ShuffleVector((vector int)a, vector(int)a, 3, 0, 1, 2) +; which targets at: +; xxsldwi a, a, 0 +; xxsldwi a, a, 1 +; xxsldwi a, a, 2 +; xxsldwi a, a, 3 + +define <4 x i32> @check_le_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_0 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_1 +; CHECK-LE: xxsldwi 34, 34, 35, 1 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_2 +; CHECK-LE: xxsldwi 34, 34, 35, 2 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_3 +; CHECK-LE: xxsldwi 34, 34, 35, 3 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_swap_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_0 +; CHECK-LE; vmr 2, 3 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_swap_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_1 +; CHECK-LE: xxsldwi 34, 35, 34, 1 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_swap_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_2 +; CHECK-LE: xxsldwi 34, 35, 34, 2 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_swap_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_3 +; CHECK-LE: xxsldwi 34, 35, 34, 3 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_vec_sldwi_va_undef_0(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_vec_sldwi_va_undef_0 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_vec_sldwi_va_undef_1(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_le_vec_sldwi_va_undef_1 +; CHECK-LE: xxsldwi 34, 34, 34, 1 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_vec_sldwi_va_undef_2(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_vec_sldwi_va_undef_2 +; CHECK-LE: xxswapd 34, 34 +; CHECK-LE: blr +} + +define <4 x i32> @check_le_vec_sldwi_va_undef_3(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_le_vec_sldwi_va_undef_3 +; CHECK-LE: xxsldwi 34, 34, 34, 3 +; CHECK-LE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_0 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_1 +; CHECK-BE: xxsldwi 34, 34, 35, 1 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_2 +; CHECK-BE: xxsldwi 34, 34, 35, 2 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_3 +; CHECK-BE: xxsldwi 34, 34, 35, 3 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_swap_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_0 +; CHECK-LE; vmr 2, 3 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_swap_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_1 +; CHECK-BE: xxsldwi 34, 35, 34, 1 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_swap_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_2 +; CHECK-BE: xxsldwi 34, 35, 34, 2 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_swap_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_3 +; CHECK-BE: xxsldwi 34, 35, 34, 3 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_undef_0(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @check_be_vec_sldwi_va_undef_0 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_undef_1(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_vec_sldwi_va_undef_1 +; CHECK-BE: xxsldwi 34, 34, 34, 1 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_undef_2(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_vec_sldwi_va_undef_2 +; CHECK-BE: xxswapd 34, 34 +; CHECK-BE: blr +} + +define <4 x i32> @check_be_vec_sldwi_va_undef_3(<4 x i32> %VA) { +entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> + ret <4 x i32> %0 +; CHECK-BE-LABEL: @check_be_vec_sldwi_va_undef_3 +; CHECK-BE: xxsldwi 34, 34, 34, 3 +; CHECK-BE: blr +} + +; More test cases to test different types of vector inputs +define <16 x i8> @test_le_vec_sldwi_v16i8_v16i8(<16 x i8> %VA, <16 x i8> %VB) { + entry: + %0 = shufflevector <16 x i8> %VA, <16 x i8> %VB,<16 x i32> + ret <16 x i8> %0 +; CHECK-LE-LABEL: @test_le_vec_sldwi_v16i8_v16i8 +; CHECK-LE: xxsldwi 34, 34, 35, 1 +; CHECK-LE: blr +} + +define <8 x i16> @test_le_vec_sldwi_v8i16_v8i16(<8 x i16> %VA, <8 x i16> %VB) { + entry: + %0 = shufflevector <8 x i16> %VA, <8 x i16> %VB,<8 x i32> + ret <8 x i16> %0 +; CHECK-LE-LABEL: @test_le_vec_sldwi_v8i16_v8i16 +; CHECK-LE: xxsldwi 34, 34, 35, 1 +; CHECK-LE: blr +} + +; Note here xxpermdi 34, 34, 35, 2 <=> xxsldwi 34, 34, 35, 2 +define <2 x i64> @test_be_vec_sldwi_v2i64_v2i64(<2 x i64> %VA, <2 x i64> %VB) { + entry: + %0 = shufflevector <2 x i64> %VA, <2 x i64> %VB,<2 x i32> + ret <2 x i64> %0 +; CHECK-LE-LABEL: @test_be_vec_sldwi_v2i64_v2i64 +; CHECK-LE: xxpermdi 34, 34, 35, 2 +; CHECK-LE: blr +}