Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -67,6 +67,10 @@ /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer. VEXTS, + /// SExtVElems, takes an input vector of a smaller type and sign extends + // to an output vector of a larger type. + SExtVElems, + /// Reciprocal estimate instructions (unary FP ops). FRE, FRSQRTE, Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -1155,6 +1155,7 @@ case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; case PPCISD::STXSIX: return "PPCISD::STXSIX"; case PPCISD::VEXTS: return "PPCISD::VEXTS"; + case PPCISD::SExtVElems: return "PPCISD::SExtVElems"; case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; @@ -11220,6 +11221,141 @@ return SDValue(); } +// This function returns the correct index for the TargetElems array +// based on the input and output of the vector extension. +int getLookUpIndex(SDValue Input, SDNode *N) { + if (Input.getValueType().getVectorElementType() == MVT::i8 && + N->getValueType(0).getVectorElementType() == MVT::i32) + return 0; + if (Input.getValueType().getVectorElementType() == MVT::i8 && + N->getValueType(0).getVectorElementType() == MVT::i64) + return 1; + if (Input.getValueType().getVectorElementType() == MVT::i16 && + N->getValueType(0).getVectorElementType() == MVT::i32) + return 2; + if (Input.getValueType().getVectorElementType() == MVT::i16 && + N->getValueType(0).getVectorElementType() == MVT::i64) + return 3; + if (Input.getValueType().getVectorElementType() == MVT::i32 && + N->getValueType(0).getVectorElementType() == MVT::i64) + return 4; + return 0; +} + +// This array encodes the indices that the vector sign extend instructions +// extract from when extending from one type to another for both BE and LE. +// The right nibble of each byte corresponds to the LE incides. +// and the left nibble of each byte corresponds to the BE incides. +// For example: 0x3074B8FC byte->word +// For LE: the allowed indices are: 0x0,0x4,0x8,0xC +// For BE: the allowed indices are: 0x3,0x7,0xB,0xF +// For example: 0x000070F8 byte->double word +// For LE: the allowed indices are: 0x0,0x8 +// For BE: the allowed indices are: 0x7,0xF +uint64_t TargetElems[] = { + 0x3074B8FC, // b->w + 0x000070F8, // b->d + 0x10325476, // h->w + 0x00003074, // h->d + 0x00001032, // w->d +}; + +// This function adds the required vector_shuffle needed to get +// the elements of the vector extract in the correct position +// as specified by the TargetElems array. +static SDValue addShuffleForVecExtend (SDNode *N, SelectionDAG &DAG, + uint64_t Elems, SDValue Input) { + SDLoc dl(N); + + SmallVector ShuffleMask; + for (unsigned i = 0; i < Input.getValueType().getVectorNumElements(); i++) + ShuffleMask.push_back(-1); + + uint64_t CorrectElems = TargetElems[getLookUpIndex(Input, N)]; + for (unsigned i = 0; i < N->getNumOperands(); i++) { + if (DAG.getDataLayout().isLittleEndian()) + ShuffleMask[CorrectElems & 0xF] = Elems & 0xF; + else + ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4; + CorrectElems = CorrectElems >> 8; + Elems = Elems >> 8; + } + + SDValue Shuffle = + DAG.getVectorShuffle(Input.getValueType(), dl, Input, + DAG.getUNDEF(Input.getValueType()), ShuffleMask); + + SDValue Ops[] = {Shuffle}; + SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, N->getValueType(0), Ops); + return BV; +} + +// Look for build vector patterns where input operands come from sign +// extended vector_extract elements of specific indices. If the correct indices +// aren't used, add a vector shuffle to fix up the indices and create a new +// PPCISD:SExtVElems node which selects the new vector sign extend instrustions +// during instruction selection. +// Extending byte to word: +// LE indices: 0,4,8,12. BE indices: 3,7,11,15 +// Extending byte to double word: +// LE indices: 0,8. BE indices: 7, 15 +// Extending half word to word: +// LE indices: 0,2,4,6. BE indices: 1,3,5,7 +// Extending half word to double word: +// LE indices: 0,4. BE indices: 3,7 +// Extending word to double word: +// LE indices: 0,2. BE indices: 1,3 +static SDValue combineBVOfVecExtend(SDNode *N, SelectionDAG &DAG) { + uint64_t Elems = 0; + int Index; + SDValue Input; + + auto isSExtOfVecExtract = [&](SDValue Op) -> int { + if (!Op) + return 0; + if (Op.getOpcode() != ISD::SIGN_EXTEND) + return 0; + + SDValue Extract = Op.getOperand(0); + if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return 0; + + ConstantSDNode *ExtOp = dyn_cast(Extract.getOperand(1)); + if (!ExtOp) + return 0; + + Index = ExtOp->getZExtValue(); + if (Input && Input != Extract.getOperand(0)) + return 0; + + if (!Input) + Input = Extract.getOperand(0); + + Elems = Elems << 8; + if (!DAG.getDataLayout().isLittleEndian()) + Index = Index << 4; + Elems |= Index; + + return 1; + }; + + // If the build vector operands aren't sign extended vector extracts, + // of the same input vector, then return. + for (unsigned i = 0; i < N->getNumOperands(); i++) { + if (!isSExtOfVecExtract(N->getOperand(i))) { + return SDValue(); + } + } + + // If the vector extract indicies are not correct, add the appropriate + // vector_shuffle. + if (Elems != TargetElems[getLookUpIndex(Input, N)]) { + return addShuffleForVecExtend(N, DAG, Elems, Input); + } + + return SDValue(); +} + SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && @@ -11247,6 +11383,15 @@ if (Reduced) return Reduced; + // If we're building a vector out of extended elements from another vector + // we have P9 vector integer extend instructions. + if (Subtarget.hasP9Altivec()) { + Reduced = combineBVOfVecExtend(N, DAG); + if (Reduced) + return Reduced; + } + + if (N->getValueType(0) != MVT::v2f64) return SDValue(); Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -32,6 +32,9 @@ def SDT_PPCVexts : SDTypeProfile<1, 2, [ SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2> ]>; +def SDT_PPCSExtVElems : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisVec<1> +]>; def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; @@ -127,6 +130,7 @@ def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix, [SDNPHasChain, SDNPMayStore]>; def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>; +def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>; // Extract FPSCR (not modeled at the DAG level). def PPCmffs : SDNode<"PPCISD::MFFS", Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -2717,6 +2717,50 @@ dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0)))); dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); } + +def ByteToWord { + dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); + dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); + dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8)); + dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8)); + dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 3)), i8)); + dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 7)), i8)); + dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 11)), i8)); + dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 15)), i8)); +} + +def ByteToDWord { + dag LE_A0 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8)); + dag LE_A1 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8)); + dag BE_A0 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v16i8:$A, 7)))), i8)); + dag BE_A1 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v16i8:$A, 15)))), i8)); +} + +def HWordToWord { + dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16)); + dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16)); + dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16)); + dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16)); + dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 1)), i16)); + dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 3)), i16)); + dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 5)), i16)); + dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 7)), i16)); +} + +def HWordToDWord { + dag LE_A0 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16)); + dag LE_A1 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16)); + dag BE_A0 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v8i16:$A, 3)))), i16)); + dag BE_A1 = (i64 (sext_inreg (i64 (anyext (i32 (vector_extract v8i16:$A, 7)))), i16)); +} + +def WordToDWord { + dag LE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0)))); + dag LE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2)))); + dag BE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 1)))); + dag BE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 3)))); +} + def FltToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A))))); } @@ -2969,4 +3013,55 @@ (VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC), (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>; } + // P9 Altivec instructions that can be used to build vectors. + // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete + // with complexities of existing build vector patterns in this file. + let Predicates = [HasP9Altivec, IsLittleEndian] in { + def: Pat<(v2i64 (PPCSExtVElems v16i8:$A)), + (v2i64 (VEXTSB2D $A))>; + def: Pat<(v2i64 (PPCSExtVElems v8i16:$A)), + (v2i64 (VEXTSH2D $A))>; + def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)), + (v2i64 (VEXTSW2D $A))>; + def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)), + (v4i32 (VEXTSB2W $A))>; + def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)), + (v4i32 (VEXTSH2W $A))>; + + def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)), + (v2i64 (VEXTSW2D $A))>; + def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)), + (v2i64 (VEXTSH2D $A))>; + def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1, HWordToWord.LE_A2, HWordToWord.LE_A3)), + (v4i32 (VEXTSH2W $A))>; + def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1, ByteToWord.LE_A2, ByteToWord.LE_A3)), + (v4i32 (VEXTSB2W $A))>; + def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)), + (v2i64 (VEXTSB2D $A))>; + } + let Predicates = [HasP9Altivec, IsBigEndian] in { + def: Pat<(v2i64 (PPCSExtVElems v16i8:$A)), + (v2i64 (VEXTSB2D $A))>; + def: Pat<(v2i64 (PPCSExtVElems v8i16:$A)), + (v2i64 (VEXTSH2D $A))>; + def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)), + (v2i64 (VEXTSW2D $A))>; + def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)), + (v4i32 (VEXTSB2W $A))>; + def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)), + (v4i32 (VEXTSH2W $A))>; + + + def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)), + (v2i64 (VEXTSW2D $A))>; + def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)), + (v2i64 (VEXTSH2D $A))>; + def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1, HWordToWord.BE_A2, HWordToWord.BE_A3)), + (v4i32 (VEXTSH2W $A))>; + def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1, ByteToWord.BE_A2, ByteToWord.BE_A3)), + (v4i32 (VEXTSB2W $A))>; + def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)), + (v2i64 (VEXTSB2D $A))>; + } + } Index: test/CodeGen/PowerPC/vec_int_ext.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/vec_int_ext.ll @@ -0,0 +1,232 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-gnu-linux -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-BE + +define <4 x i32> @vextsb2wLE(<16 x i8> %a) { +; CHECK-LE-LABEL: vextsb2wLE: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: vextsb2w 2, 2 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: vextsb2wLE: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: vextsb2w 2, 2 +; CHECK-BE-NEXT: blr + +entry: + %vecext = extractelement <16 x i8> %a, i32 0 + %conv = sext i8 %vecext to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 4 + %conv2 = sext i8 %vecext1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %vecext4 = extractelement <16 x i8> %a, i32 8 + %conv5 = sext i8 %vecext4 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %vecext7 = extractelement <16 x i8> %a, i32 12 + %conv8 = sext i8 %vecext7 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +} + +define <2 x i64> @vextsb2dLE(<16 x i8> %a) { +; CHECK-LE-LABEL: vextsb2dLE: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: vextsb2d 2, 2 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: vextsb2dLE: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: vextsb2d 2, 2 +; CHECK-BE-NEXT: blr + +entry: + %vecext = extractelement <16 x i8> %a, i32 0 + %conv = sext i8 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 8 + %conv2 = sext i8 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +} + +define <4 x i32> @vextsh2wLE(<8 x i16> %a) { +; CHECK-LE-LABEL: vextsh2wLE: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: vextsh2w 2, 2 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: vextsh2wLE: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: vextsh2w 2, 2 +; CHECK-BE-NEXT: blr + +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = sext i16 %vecext to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 2 + %conv2 = sext i16 %vecext1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %vecext4 = extractelement <8 x i16> %a, i32 4 + %conv5 = sext i16 %vecext4 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %vecext7 = extractelement <8 x i16> %a, i32 6 + %conv8 = sext i16 %vecext7 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +} + +define <2 x i64> @vextsh2dLE(<8 x i16> %a) { +; CHECK-LE-LABEL: vextsh2dLE: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: vextsh2d 2, 2 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: vextsh2dLE: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: vextsh2d 2, 2 +; CHECK-BE-NEXT: blr + +entry: + %vecext = extractelement <8 x i16> %a, i32 0 + %conv = sext i16 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 4 + %conv2 = sext i16 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +} + +define <2 x i64> @vextsw2dLE(<4 x i32> %a) { +; CHECK-LE-LABEL: vextsw2dLE: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: vextsw2d 2, 2 +; CHECK-LE-NEXT: blr +; CHECK-BE-LABEL: vextsw2dLE: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE: vmrgew +; CHECK-BE-NEXT: vextsw2d 2, 2 +; CHECK-BE-NEXT: blr + +entry: + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = sext i32 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <4 x i32> %a, i32 2 + %conv2 = sext i32 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +} + +define <4 x i32> @vextsb2wBE(<16 x i8> %a) { +; CHECK-BE-LABEL: vextsb2wBE: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: vextsb2w 2, 2 +; CHECK-BE-NEXT: blr +; CHECK-LE-LABEL: vextsb2wBE: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: vsldoi 2, 2, 2, 13 +; CHECK-LE-NEXT: vextsb2w 2, 2 +; CHECK-LE-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 3 + %conv = sext i8 %vecext to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 7 + %conv2 = sext i8 %vecext1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %vecext4 = extractelement <16 x i8> %a, i32 11 + %conv5 = sext i8 %vecext4 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %vecext7 = extractelement <16 x i8> %a, i32 15 + %conv8 = sext i8 %vecext7 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +} + +define <2 x i64> @vextsb2dBE(<16 x i8> %a) { +; CHECK-BE-LABEL: vextsb2dBE: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: vextsb2d 2, 2 +; CHECK-BE-NEXT: blr +; CHECK-LE-LABEL: vextsb2dBE: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: vsldoi 2, 2, 2, 9 +; CHECK-LE-NEXT: vextsb2d 2, 2 +; CHECK-LE-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 7 + %conv = sext i8 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <16 x i8> %a, i32 15 + %conv2 = sext i8 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +} + +define <4 x i32> @vextsh2wBE(<8 x i16> %a) { +; CHECK-BE-LABEL: vextsh2wBE: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: vextsh2w 2, 2 +; CHECK-BE-NEXT: blr +; CHECK-LE-LABEL: vextsh2wBE: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: vsldoi 2, 2, 2, 14 +; CHECK-LE-NEXT: vextsh2w 2, 2 +; CHECK-LE-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 1 + %conv = sext i16 %vecext to i32 + %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 3 + %conv2 = sext i16 %vecext1 to i32 + %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1 + %vecext4 = extractelement <8 x i16> %a, i32 5 + %conv5 = sext i16 %vecext4 to i32 + %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2 + %vecext7 = extractelement <8 x i16> %a, i32 7 + %conv8 = sext i16 %vecext7 to i32 + %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3 + ret <4 x i32> %vecinit9 +} + +define <2 x i64> @vextsh2dBE(<8 x i16> %a) { +; CHECK-BE-LABEL: vextsh2dBE: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: vextsh2d 2, 2 +; CHECK-BE-NEXT: blr +; CHECK-LE-LABEL: vextsh2dBE: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: vsldoi 2, 2, 2, 10 +; CHECK-LE-NEXT: vextsh2d 2, 2 +; CHECK-LE-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 3 + %conv = sext i16 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <8 x i16> %a, i32 7 + %conv2 = sext i16 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +} + +define <2 x i64> @vextsw2dBE(<4 x i32> %a) { +; CHECK-BE-LABEL: vextsw2dBE: +; CHECK-BE: # BB#0: # %entry +; CHECK-BE-NEXT: vextsw2d 2, 2 +; CHECK-BE-NEXT: blr +; CHECK-LE-LABEL: vextsw2dBE: +; CHECK-LE: # BB#0: # %entry +; CHECK-LE-NEXT: vsldoi 2, 2, 2, 12 +; CHECK-LE-NEXT: vextsw2d 2, 2 +; CHECK-LE-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 1 + %conv = sext i32 %vecext to i64 + %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0 + %vecext1 = extractelement <4 x i32> %a, i32 3 + %conv2 = sext i32 %vecext1 to i64 + %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1 + ret <2 x i64> %vecinit3 +}