Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -90,6 +90,10 @@ /// VECSHL, + /// XXPERMDI - The PPC XXPERMDI instruction + /// + XXPERMDI, + /// The CMPB instruction (takes two operands of i32 or i64). CMPB, @@ -450,6 +454,8 @@ /// for a XXSLDWI instruction. bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE); + bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE); /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the /// shift amount, otherwise return -1. Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -1092,6 +1092,7 @@ case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; + case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; @@ -1664,6 +1665,24 @@ return false; } +// Check that the mask is shuffling double words +static bool isDoubleWordShuffleMask(ShuffleVectorSDNode *N) { + unsigned B[8]; + for (unsigned i = 0; i < 2; ++i) { + B[0] = N->getMaskElt(i * 8); + if (B[0] % 8) + return false; + + for (unsigned int j = 1; j < 8; ++j) { + B[j] = N->getMaskElt(i * 8 + j); + if (B[j] != B[j-1] + 1) + return false; + } + } + + return true; +} + bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE) { if (N->getValueType(0) != MVT::v16i8) @@ -1720,6 +1739,63 @@ } } +// Calculate the third parameter of XXPERMDI, which is DM in the ISA +static unsigned getDMValue(unsigned M0, unsigned M1, bool IsLE) { + return IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : ((M0 << 1) + (M1 & 1)); +} + +bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, + bool &Swap, bool IsLE) { + if (N->getValueType(0) != MVT::v16i8) + return false; + + // Ensure each byte index of the double word is consecutive. + if (!isDoubleWordShuffleMask(N)) + return false; + + unsigned M0 = N->getMaskElt(0) / 8; + unsigned M1 = N->getMaskElt(8) / 8; + + + // If both vector operands for the shuffle are the same vector, the mask will + // contain only elements from the first one and the second one will be undef. + if (N->getOperand(1).isUndef()) { + if ((M0 == 0 || M0 == 1) && (M1 == 0 || M1 == 1)) { + DM = getDMValue(M0, M1, IsLE); + Swap = false; + return true; + } else + return false; + } + + if (IsLE) { + if ((M0 == 3 || M0 == 2) || (M1 == 0 && M1 == 1)) { + Swap = false; + } else if ((M0 == 1 && M1 == 3) || (M0 == 0 && M1 == 3) || + (M0 == 1 && M1 == 2) || (M0 == 0 && M1 == 2)) { + M0 = (M0 + 2) % 4; + M1 = (M1 + 2) % 4; + Swap = true; + } else + return false; + + DM = getDMValue(M0, M1, IsLE); + return true; + } else { // BE + if ((M0 == 0 || M0 == 1) || (M1 == 2 && M1 == 3)) { + Swap = false; + } else if ((M0 == 2 || M0 == 3) && (M1 == 0 || M1 == 1)) { + M0 = (M0 + 2) % 4; + M1 = (M1 + 2) % 4; + Swap = true; + } else + return false; + + DM = getDMValue(M0, M1, IsLE); + return true; + } +} + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. @@ -7734,6 +7810,19 @@ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl); } + if (Subtarget.hasVSX() && + PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { + if (Swap) + std::swap(V1, V2); + SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue Conv2 = + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2); + + SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v4i32, Conv1, Conv2, + DAG.getConstant(ShiftElts, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI); + } + if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -53,6 +53,10 @@ SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> ]>; +def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> +]>; + def SDT_PPCvcmp : SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32> ]>; @@ -170,6 +174,7 @@ def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>; +def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>; Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -843,7 +843,9 @@ def XXPERMDI : XX3Form_2<60, 10, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM), - "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>; + "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, + [(set v4i32:$XT, (PPCxxpermdi v4i32:$XA, v4i32:$XB, + imm32SExt16:$DM))]>; let isCodeGenOnly = 1 in def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM), "xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>; Index: test/CodeGen/PowerPC/vec_xxpermdi.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/vec_xxpermdi.ll @@ -0,0 +1,307 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | \ +; RUN: FileCheck %s -check-prefix=CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | \ +; RUN: FileCheck %s -check-prefix=CHECK-BE + +; Possible LE ShuffleVector masks (Case 1): +; ShuffleVector((vector double)a, (vector double)b, 3, 1) +; ShuffleVector((vector double)a, (vector double)b, 2, 1) +; ShuffleVector((vector double)a, (vector double)b, 3, 0) +; ShuffleVector((vector double)a, (vector double)b, 2, 0) +; which targets at: +; xxpermdi a, b, 0 +; xxpermdi a, b, 1 +; xxpermdi a, b, 2 +; xxpermdi a, b, 3 +; Possible LE Swap ShuffleVector masks (Case 2): +; ShuffleVector((vector double)a, (vector double)b, 1, 3) +; ShuffleVector((vector double)a, (vector double)b, 0, 3) +; ShuffleVector((vector double)a, (vector double)b, 1, 2) +; ShuffleVector((vector double)a, (vector double)b, 0, 2) +; which targets at: +; xxpermdi b, a, 0 +; xxpermdi b, a, 1 +; xxpermdi b, a, 2 +; xxpermdi b, a, 3 +; Possible LE ShuffleVector masks when a == b, b is undef (Case 3): +; ShuffleVector((vector double)a, (vector double)a, 1, 1) +; ShuffleVector((vector double)a, (vector double)a, 0, 1) +; ShuffleVector((vector double)a, (vector double)a, 1, 0) +; ShuffleVector((vector double)a, (vector double)a, 0, 0) +; which targets at: +; xxpermdi a, a, 0 +; xxpermdi a, a, 1 +; xxpermdi a, a, 2 +; xxpermdi a, a, 3 + +; Possible BE ShuffleVector masks (Case 4): +; ShuffleVector((vector double)a, (vector double)b, 0, 2) +; ShuffleVector((vector double)a, (vector double)b, 0, 3) +; ShuffleVector((vector double)a, (vector double)b, 1, 2) +; ShuffleVector((vector double)a, (vector double)b, 1, 3) +; which targets at: +; xxpermdi a, b, 0 +; xxpermdi a, b, 1 +; xxpermdi a, b, 2 +; xxpermdi a, b, 3 +; Possible BE Swap ShuffleVector masks (Case 5): +; ShuffleVector((vector double)a, (vector double)b, 2, 0) +; ShuffleVector((vector double)a, (vector double)b, 3, 0) +; ShuffleVector((vector double)a, (vector double)b, 2, 1) +; ShuffleVector((vector double)a, (vector double)b, 3, 1) +; which targets at: +; xxpermdi b, a, 0 +; xxpermdi b, a, 1 +; xxpermdi b, a, 2 +; xxpermdi b, a, 3 +; Possible BE ShuffleVector masks when a == b, b is undef (Case 6): +; ShuffleVector((vector double)a, (vector double)a, 0, 0) +; ShuffleVector((vector double)a, (vector double)a, 0, 1) +; ShuffleVector((vector double)a, (vector double)a, 1, 0) +; ShuffleVector((vector double)a, (vector double)a, 1, 1) +; which targets at: +; xxpermdi a, a, 0 +; xxpermdi a, a, 1 +; xxpermdi a, a, 2 +; xxpermdi a, a, 3 + +define <2 x double> @test_le_vec_xxpermdi_v2f64_v2f64_0(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_v2f64_0 +; CHECK-LE: xxmrghd 34, 34, 35 +; CHECK-LE: blr +} + +define <2 x double> @test_le_vec_xxpermdi_v2f64_v2f64_1(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_v2f64_1 +; CHECK-LE: xxpermdi 34, 34, 35, 1 +; CHECK-LE: blr +} + +define <2 x double> @test_le_vec_xxpermdi_v2f64_v2f64_2(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_v2f64_2 +; CHECK-LE: xxpermdi 34, 34, 35, 2 +; CHECK-LE: blr +} + +define <2 x double> @test_le_vec_xxpermdi_v2f64_v2f64_3(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_v2f64_3 +; CHECK-LE: xxmrgld 34, 34, 35 +; CHECK-LE: blr +} + +define <2 x double> @test_le_swap_vec_xxpermdi_v2f64_v2f64_0(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v2f64_v2f64_0 +; CHECK-LE: xxmrghd 34, 35, 34 +; CHECK-LE: blr +} + +define <2 x double> @test_le_swap_vec_xxpermdi_v2f64_v2f64_1(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v2f64_v2f64_1 +; CHECK-LE: xxpermdi 34, 35, 34, 1 +; CHECK-LE: blr +} + +define <2 x double> @test_le_swap_vec_xxpermdi_v2f64_v2f64_2(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v2f64_v2f64_2 +; CHECK-LE: xxpermdi 34, 35, 34, 2 +; CHECK-LE: blr +} + +define <2 x double> @test_le_swap_vec_xxpermdi_v2f64_v2f64_3(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v2f64_v2f64_3 +; CHECK-LE: xxmrgld 34, 35, 34 +; CHECK-LE: blr +} + +define <2 x double> @test_le_vec_xxpermdi_v2f64_undef_0(<2 x double> %VA) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> + ret <2 x double> %0 +; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_undef_0 +; CHECK-LE: xxspltd 34, 34, 0 +; CHECK-LE: blr +} + +define <2 x double> @test_le_vec_xxpermdi_v2f64_undef_1(<2 x double> %VA) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> + ret <2 x double> %0 +; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_undef_1 +; CHECK-LE: blr +} + +define <2 x double> @test_le_vec_xxpermdi_v2f64_undef_2(<2 x double> %VA) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> + ret <2 x double> %0 +; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_undef_2 +; CHCECK-LE: xxswapd 34, 34 +} + +define <2 x double> @test_le_vec_xxpermdi_v2f64_undef_3(<2 x double> %VA) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> + ret <2 x double> %0 +; CHECK-LE-LABEL: @test_le_vec_xxpermdi_v2f64_undef_3 +; CHECK-LE: xxspltd 34, 34, 1 +; CHECK-LE: blr +} + +; Start testing BE +define <2 x double> @test_be_vec_xxpermdi_v2f64_v2f64_0(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_v2f64_0 +; CHECK-BE: xxmrghd 34, 34, 35 +; CHECK-BE: blr +} + +define <2 x double> @test_be_vec_xxpermdi_v2f64_v2f64_1(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_v2f64_1 +; CHECK-BE: xxpermdi 34, 34, 35, 1 +; CHECK-BE: blr +} + +define <2 x double> @test_be_vec_xxpermdi_v2f64_v2f64_2(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_v2f64_2 +; CHECK-BE: xxpermdi 34, 34, 35, 2 +; CHECK-BE: blr +} + +define <2 x double> @test_be_vec_xxpermdi_v2f64_v2f64_3(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_v2f64_3 +; CHECK-BE: xxmrgld 34, 34, 35 +; CHECK-BE: blr +} + +define <2 x double> @test_be_swap_vec_xxpermdi_v2f64_v2f64_0(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-BE-LABEL: @test_be_swap_vec_xxpermdi_v2f64_v2f64_0 +; CHECK-BE: xxmrghd 34, 35, 34 +; CHECK-BE: blr +} + +define <2 x double> @test_be_swap_vec_xxpermdi_v2f64_v2f64_1(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-BE-LABEL: @test_be_swap_vec_xxpermdi_v2f64_v2f64_1 +; CHECK-BE: xxpermdi 34, 35, 34, 1 +; CHECK-BE: blr +} + +define <2 x double> @test_be_swap_vec_xxpermdi_v2f64_v2f64_2(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-BE-LABEL: @test_be_swap_vec_xxpermdi_v2f64_v2f64_2 +; CHECK-BE: xxpermdi 34, 35, 34, 2 +; CHECK-BE: blr +} + +define <2 x double> @test_be_swap_vec_xxpermdi_v2f64_v2f64_3(<2 x double> %VA, <2 x double> %VB) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> %VB,<2 x i32> + ret <2 x double> %0 +; CHECK-BE-LABEL: @test_be_swap_vec_xxpermdi_v2f64_v2f64_3 +; CHECK-BE: xxmrgld 34, 35, 34 +; CHECK-BE: blr +} + +define <2 x double> @test_be_vec_xxpermdi_v2f64_undef_0(<2 x double> %VA) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> + ret <2 x double> %0 +; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_undef_0 +; CHECK-BE: xxspltd 34, 34, 0 +; CHECK-BE: blr +} + +define <2 x double> @test_be_vec_xxpermdi_v2f64_undef_1(<2 x double> %VA) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> + ret <2 x double> %0 +; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_undef_1 +; CHECK-BE: blr +} + +define <2 x double> @test_be_vec_xxpermdi_v2f64_undef_2(<2 x double> %VA) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> + ret <2 x double> %0 +; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_undef_2 +; CHCECK-LE: xxswapd 34, 34 +} + +define <2 x double> @test_be_vec_xxpermdi_v2f64_undef_3(<2 x double> %VA) { + entry: + %0 = shufflevector <2 x double> %VA, <2 x double> undef, <2 x i32> + ret <2 x double> %0 +; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v2f64_undef_3 +; CHECK-BE: xxspltd 34, 34, 1 +; CHECK-BE: blr +} + +; More test cases to test different types of vector inputs +define <16 x i8> @test_be_vec_xxpermdi_v16i8_v16i8(<16 x i8> %VA, <16 x i8> %VB) { + entry: + %0 = shufflevector <16 x i8> %VA, <16 x i8> %VB,<16 x i32> + ret <16 x i8> %0 +; CHECK-BE-LABEL: @test_be_vec_xxpermdi_v16i8_v16i8 +; CHECK-BE: xxpermdi 34, 34, 35, 1 +; CHECK-BE: blr +} + +define <8 x i16> @test_le_swap_vec_xxpermdi_v8i16_v8i16(<8 x i16> %VA, <8 x i16> %VB) { + entry: + %0 = shufflevector <8 x i16> %VA, <8 x i16> %VB,<8 x i32> + ret <8 x i16> %0 +; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v8i16_v8i16 +; CHECK-LE: xxpermdi 34, 35, 34, 1 +; CHECK-LE: blr +} + +define <4 x i32> @test_le_swap_vec_xxpermdi_v4i32_v4i32(<4 x i32> %VA, <4 x i32> %VB) { + entry: + %0 = shufflevector <4 x i32> %VA, <4 x i32> %VB,<4 x i32> + ret <4 x i32> %0 +; CHECK-LE-LABEL: @test_le_swap_vec_xxpermdi_v4i32_v4i32 +; CHECK-LE: xxpermdi 34, 35, 34, 1 +; CHECK-LE: blr +}