Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h @@ -86,6 +86,10 @@ /// XXINSERT, + /// XXREVERSE - The PPC VSX reverse instruction + /// + XXREVERSE, + /// VECSHL - The PPC VSX shift left instruction /// VECSHL, @@ -458,6 +462,23 @@ /// for a XXSLDWI instruction. bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE); + + /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRH instruction. + bool isXXBRHShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRW instruction. + bool isXXBRWShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRD instruction. + bool isXXBRDShuffleMask(ShuffleVectorSDNode *N); + + /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXBRQ instruction. + bool isXXBRQShuffleMask(ShuffleVectorSDNode *N); + /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable /// for a XXPERMDI instruction. bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1128,6 +1128,7 @@ case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; + case PPCISD::XXREVERSE: return "PPCISD::XXREVERSE"; case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; @@ -1610,22 +1611,34 @@ return true; } -// Check that the mask is shuffling N byte elements. -static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width) { +/// Check that the mask is shuffling N byte elements. Within each N byte +/// element of the mask, the indices could be either in increasing or +/// decreasing order as long as they are consecutive. +/// \param[in] N: the shuffle vector SD Node to analyze +/// \param[in] Width: the element width in bytes, could be 2/4/8/16 (HalfWord/ +/// Word/DoubleWord/QuadWord). +/// \param[in] StepLen: the delta indices number among the N byte element, if +/// the mask is in increasing/decreasing order then it is 1/-1. +/// \return true iff the mask is shuffling N byte elements. +static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width, + int StepLen) { assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) && "Unexpected element width."); + assert((StepLen == 1 || StepLen == -1) && "Unexpected element width."); unsigned NumOfElem = 16 / Width; unsigned MaskVal[16]; // Width is never greater than 16 for (unsigned i = 0; i < NumOfElem; ++i) { MaskVal[0] = N->getMaskElt(i * Width); - if (MaskVal[0] % Width) { + if ((StepLen == 1) && (MaskVal[0] % Width)) { + return false; + } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) { return false; } for (unsigned int j = 1; j < Width; ++j) { MaskVal[j] = N->getMaskElt(i * Width + j); - if (MaskVal[j] != MaskVal[j-1] + 1) { + if (MaskVal[j] != MaskVal[j-1] + StepLen) { return false; } } @@ -1636,7 +1649,7 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE) { - if (!isNByteElemShuffleMask(N, 4)) + if (!isNByteElemShuffleMask(N, 4, 1)) return false; // Now we look at mask elements 0,4,8,12 @@ -1713,7 +1726,7 @@ bool &Swap, bool IsLE) { assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); // Ensure each byte index of the word is consecutive. - if (!isNByteElemShuffleMask(N, 4)) + if (!isNByteElemShuffleMask(N, 4, 1)) return false; // Now we look at mask elements 0,4,8,12, which are the beginning of words. @@ -1771,6 +1784,35 @@ } } +bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) { + assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); + + if (!isNByteElemShuffleMask(N, Width, -1)) + return false; + + for (int i = 0; i < 16; i += Width) + if (N->getMaskElt(i) != i + Width - 1) + return false; + + return true; +} + +bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 2); +} + +bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 4); +} + +bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 8); +} + +bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) { + return isXXBRShuffleMaskHelper(N, 16); +} + /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap /// if the inputs to the instruction should be swapped and set \p DM to the /// value for the immediate. @@ -1784,7 +1826,7 @@ assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); // Ensure each byte index of the double word is consecutive. - if (!isNByteElemShuffleMask(N, 8)) + if (!isNByteElemShuffleMask(N, 8, 1)) return false; unsigned M0 = N->getMaskElt(0) / 8; @@ -7859,6 +7901,26 @@ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI); } + if (Subtarget.hasP9Vector()) { + if (PPC::isXXBRHShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); + SDValue ReveHWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v8i16, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord); + } else if (PPC::isXXBRWShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue ReveWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v4i32, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord); + } else if (PPC::isXXBRDShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); + SDValue ReveDWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v2i64, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord); + } else if (PPC::isXXBRQShuffleMask(SVOp)) { + SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1); + SDValue ReveQWord = DAG.getNode(PPCISD::XXREVERSE, dl, MVT::v1i128, Conv); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord); + } + } + if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); Index: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td @@ -53,6 +53,10 @@ SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> ]>; +def SDT_PPCVecReverse: SDTypeProfile<1, 1, [ SDTCisVec<0>, + SDTCisVec<1> +]>; + def SDT_PPCxxpermdi: SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> ]>; @@ -174,6 +178,7 @@ def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>; +def PPCxxreverse : SDNode<"PPCISD::XXREVERSE", SDT_PPCVecReverse, []>; def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; Index: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td @@ -2340,6 +2340,16 @@ def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc, []>; def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>; + // Vector Reverse + def : Pat<(v8i16 (PPCxxreverse v8i16 :$A)), + (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; + def : Pat<(v4i32 (PPCxxreverse v4i32 :$A)), + (v4i32 (XXBRW $A))>; + def : Pat<(v2i64 (PPCxxreverse v2i64 :$A)), + (v2i64 (XXBRD $A))>; + def : Pat<(v1i128 (PPCxxreverse v1i128 :$A)), + (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>; + // Vector Permute def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc, IIC_VecPerm, []>; Index: llvm/trunk/test/CodeGen/PowerPC/vec_revb.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/vec_revb.ll +++ llvm/trunk/test/CodeGen/PowerPC/vec_revb.ll @@ -0,0 +1,54 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s + +define <8 x i16> @testXXBRH(<8 x i16> %a) { +; CHECK-LABEL: testXXBRH: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xxbrh 34, 34 +; CHECK-NEXT: blr + +entry: + %0 = bitcast <8 x i16> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <8 x i16> + ret <8 x i16> %2 +} + +define <4 x i32> @testXXBRW(<4 x i32> %a) { +; CHECK-LABEL: testXXBRW: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xxbrw 34, 34 +; CHECK-NEXT: blr + +entry: + %0 = bitcast <4 x i32> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <4 x i32> + ret <4 x i32> %2 +} + +define <2 x double> @testXXBRD(<2 x double> %a) { +; CHECK-LABEL: testXXBRD: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xxbrd 34, 34 +; CHECK-NEXT: blr + +entry: + %0 = bitcast <2 x double> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <2 x double> + ret <2 x double> %2 +} + +define <1 x i128> @testXXBRQ(<1 x i128> %a) { +; CHECK-LABEL: testXXBRQ: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: xxbrq 34, 34 +; CHECK-NEXT: blr + +entry: + %0 = bitcast <1 x i128> %a to <16 x i8> + %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %2 = bitcast <16 x i8> %1 to <1 x i128> + ret <1 x i128> %2 +}