Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h @@ -1072,7 +1072,14 @@ SDValue combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const; - }; + + /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be + /// handled by the VINSERTH instruction introduced in ISA 3.0. This is + /// essentially any shuffle of v8i16 vectors that just inserts one element + /// from one vector into the other. + SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const; + + }; // end class PPCTargetLowering namespace PPC { Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -114,6 +114,8 @@ STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); +static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int); + // FIXME: Remove this once the bug has been fixed! extern cl::opt ANDIGlueBug; @@ -7886,6 +7888,118 @@ return DAG.getNode(ISD::BITCAST, dl, VT, T); } +/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled +/// by the VINSERTH instruction introduced in ISA 3.0, else just return default +/// SDValue. +SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N, + SelectionDAG &DAG) const { + const unsigned NumHalfWords = 8; + const unsigned BytesInVector = NumHalfWords * 2; + // Check that the shuffle is on half-words. + if (!isNByteElemShuffleMask(N, 2, 1)) + return SDValue(); + + bool IsLE = Subtarget.isLittleEndian(); + SDLoc dl(N); + SDValue V1 = N->getOperand(0); + SDValue V2 = N->getOperand(1); + unsigned ShiftElts = 0, InsertAtByte = 0; + bool Swap = false; + + // Shifts required to get the half-word we want at element 3. + unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5}; + unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4}; + + uint32_t Mask = 0; + uint32_t OriginalOrderLow = 0x1234567; + uint32_t OriginalOrderHigh = 0x89ABCDEF; + // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a + // 32-bit space, only need 4-bit nibbles per element. + for (unsigned i = 0; i < NumHalfWords; ++i) { + unsigned MaskShift = (NumHalfWords - 1 - i) * 4; + Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift); + } + + // For each mask element, find out if we're just inserting something + // from V2 into V1 or vice versa. Possible permutations inserting an element + // from V2 into V1: + // X, 1, 2, 3, 4, 5, 6, 7 + // 0, X, 2, 3, 4, 5, 6, 7 + // 0, 1, X, 3, 4, 5, 6, 7 + // 0, 1, 2, X, 4, 5, 6, 7 + // 0, 1, 2, 3, X, 5, 6, 7 + // 0, 1, 2, 3, 4, X, 6, 7 + // 0, 1, 2, 3, 4, 5, X, 7 + // 0, 1, 2, 3, 4, 5, 6, X + // Inserting from V1 into V2 will be similar, except mask range will be [8,15]. + + bool FoundCandidate = false; + // Go through the mask of half-words to find an element that's being moved + // from one vector to the other. + for (unsigned i = 0; i < NumHalfWords; ++i) { + unsigned MaskShift = (NumHalfWords - 1 - i) * 4; + uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF; + uint32_t MaskOtherElts = ~(0xF << MaskShift); + uint32_t TargetOrder = 0x0; + + // If both vector operands for the shuffle are the same vector, the mask + // will contain only elements from the first one and the second one will be + // undef. + if (V2.isUndef()) { + ShiftElts = 0; + unsigned VINSERTHSrcElem = IsLE ? 4 : 3; + TargetOrder = OriginalOrderLow; + Swap = false; + // Skip if not the correct element or mask of other elements don't equal + // to our expected order. + if (MaskOneElt == VINSERTHSrcElem && + (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) { + InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2; + FoundCandidate = true; + break; + } + } else { // If both operands are defined. + // Target order is [8,15] if the current mask is between [0,7]. + TargetOrder = + (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow; + // Skip if mask of other elements don't equal our expected order. + if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) { + // We only need the last 3 bits for the number of shifts. + ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7] + : BigEndianShifts[MaskOneElt & 0x7]; + InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2; + Swap = MaskOneElt < NumHalfWords; + FoundCandidate = true; + break; + } + } + } + + if (!FoundCandidate) + return SDValue(); + + // Candidate found, construct the proper SDAG sequence with VINSERTH, + // optionally with VECSHL if shift is required. + if (Swap) + std::swap(V1, V2); + if (V2.isUndef()) + V2 = V1; + SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); + if (ShiftElts) { + // Double ShiftElts because we're left shifting on v16i8 type. + SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2, + DAG.getConstant(2 * ShiftElts, dl, MVT::i32)); + SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl); + SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2, + DAG.getConstant(InsertAtByte, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); + } + SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2); + SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2, + DAG.getConstant(InsertAtByte, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); +} + /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this /// is a shuffle we can handle in a single instruction, return it. Otherwise, /// return the code it can be lowered into. Worst case, it can always be @@ -7920,6 +8034,11 @@ return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } + if (Subtarget.hasP9Altivec()) { + SDValue NewISDNode = lowerToVINSERTH(SVOp, DAG); + if (NewISDNode) + return NewISDNode; + } if (Subtarget.hasVSX() && PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { Index: llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td @@ -477,10 +477,10 @@ def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>; // Shuffles. -def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u5imm:$SH), +def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u4imm:$SH), "vsldoi $vD, $vA, $vB, $SH", IIC_VecFP, - [(set v16i8:$vD, - (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>; + [(set v16i8:$vD, + (PPCvecshl v16i8:$vA, v16i8:$vB, imm32SExt16:$SH))]>; // VX-Form instructions. AltiVec arithmetic ops. let isCommutable = 1 in { @@ -908,6 +908,9 @@ (VPKUWUM $vA, $vA)>; def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef), (VPKUHUM $vA, $vA)>; +def:Pat<(vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB), + (VSLDOI v16i8:$vA, v16i8:$vB, (VSLDOI_get_imm $SH))>; + // Match vsldoi(y,x), vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands. // These fragments are matched for little-endian, where the inputs must @@ -1310,7 +1313,12 @@ // Vector Insert Element Instructions def VINSERTB : VX1_VT5_UIM5_VB5<781, "vinsertb", []>; -def VINSERTH : VX1_VT5_UIM5_VB5<845, "vinserth", []>; +def VINSERTH : VXForm_1<845, (outs vrrc:$vD), + (ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB), + "vinserth $vD, $vB, $UIM", IIC_VecGeneral, + [(set v8i16:$vD, (PPCvecinsert v8i16:$vDi, v8i16:$vB, + imm32SExt16:$UIM))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; def VINSERTW : VX1_VT5_UIM5_VB5<909, "vinsertw", []>; def VINSERTD : VX1_VT5_UIM5_VB5<973, "vinsertd", []>; Index: llvm/trunk/test/CodeGen/PowerPC/p9-vinsert-vextract.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/p9-vinsert-vextract.ll +++ llvm/trunk/test/CodeGen/PowerPC/p9-vinsert-vextract.ll @@ -0,0 +1,300 @@ +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -O0 -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE + +; The following testcases take one halfword element from the second vector and +; inserts it at various locations in the first vector +define <8 x i16> @shuffle_vector_halfword_0_8(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_0_8 +; CHECK: vsldoi 3, 3, 3, 8 +; CHECK: vinserth 2, 3, 14 +; CHECK-BE-LABEL: shuffle_vector_halfword_0_8 +; CHECK-BE: vsldoi 3, 3, 3, 10 +; CHECK-BE: vinserth 2, 3, 0 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_1_15(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_1_15 +; CHECK: vsldoi 3, 3, 3, 10 +; CHECK: vinserth 2, 3, 12 +; CHECK-BE-LABEL: shuffle_vector_halfword_1_15 +; CHECK-BE: vsldoi 3, 3, 3, 8 +; CHECK-BE: vinserth 2, 3, 2 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_2_9(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_2_9 +; CHECK: vsldoi 3, 3, 3, 6 +; CHECK: vinserth 2, 3, 10 +; CHECK-BE-LABEL: shuffle_vector_halfword_2_9 +; CHECK-BE: vsldoi 3, 3, 3, 12 +; CHECK-BE: vinserth 2, 3, 4 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_3_13(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_3_13 +; CHECK: vsldoi 3, 3, 3, 14 +; CHECK: vinserth 2, 3, 8 +; CHECK-BE-LABEL: shuffle_vector_halfword_3_13 +; CHECK-BE: vsldoi 3, 3, 3, 4 +; CHECK-BE: vinserth 2, 3, 6 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_4_10(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_4_10 +; CHECK: vsldoi 3, 3, 3, 4 +; CHECK: vinserth 2, 3, 6 +; CHECK-BE-LABEL: shuffle_vector_halfword_4_10 +; CHECK-BE: vsldoi 3, 3, 3, 14 +; CHECK-BE: vinserth 2, 3, 8 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_5_14(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_5_14 +; CHECK: vsldoi 3, 3, 3, 12 +; CHECK: vinserth 2, 3, 4 +; CHECK-BE-LABEL: shuffle_vector_halfword_5_14 +; CHECK-BE: vsldoi 3, 3, 3, 6 +; CHECK-BE: vinserth 2, 3, 10 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_6_11(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_6_11 +; CHECK: vsldoi 3, 3, 3, 2 +; CHECK: vinserth 2, 3, 2 +; CHECK-BE-LABEL: shuffle_vector_halfword_6_11 +; CHECK-BE: vinserth 2, 3, 12 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_7_12(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_7_12 +; CHECK: vinserth 2, 3, 0 +; CHECK-BE-LABEL: shuffle_vector_halfword_7_12 +; CHECK-BE: vsldoi 3, 3, 3, 2 +; CHECK-BE: vinserth 2, 3, 14 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_8_1(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_8_1 +; CHECK: vsldoi 2, 2, 2, 6 +; CHECK: vinserth 3, 2, 14 +; CHECK: vmr 2, 3 +; CHECK-BE-LABEL: shuffle_vector_halfword_8_1 +; CHECK-BE: vsldoi 2, 2, 2, 12 +; CHECK-BE: vinserth 3, 2, 0 +; CHECK-BE: vmr 2, 3 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +; The following testcases take one halfword element from the first vector and +; inserts it at various locations in the second vector +define <8 x i16> @shuffle_vector_halfword_9_7(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_9_7 +; CHECK: vsldoi 2, 2, 2, 10 +; CHECK: vinserth 3, 2, 12 +; CHECK: vmr 2, 3 +; CHECK-BE-LABEL: shuffle_vector_halfword_9_7 +; CHECK-BE: vsldoi 2, 2, 2, 8 +; CHECK-BE: vinserth 3, 2, 2 +; CHECK-BE: vmr 2, 3 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_10_4(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_10_4 +; CHECK: vinserth 3, 2, 10 +; CHECK: vmr 2, 3 +; CHECK-BE-LABEL: shuffle_vector_halfword_10_4 +; CHECK-BE: vsldoi 2, 2, 2, 2 +; CHECK-BE: vinserth 3, 2, 4 +; CHECK-BE: vmr 2, 3 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_11_2(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_11_2 +; CHECK: vsldoi 2, 2, 2, 4 +; CHECK: vinserth 3, 2, 8 +; CHECK: vmr 2, 3 +; CHECK-BE-LABEL: shuffle_vector_halfword_11_2 +; CHECK-BE: vsldoi 2, 2, 2, 14 +; CHECK-BE: vinserth 3, 2, 6 +; CHECK-BE: vmr 2, 3 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_12_6(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_12_6 +; CHECK: vsldoi 2, 2, 2, 12 +; CHECK: vinserth 3, 2, 6 +; CHECK: vmr 2, 3 +; CHECK-BE-LABEL: shuffle_vector_halfword_12_6 +; CHECK-BE: vsldoi 2, 2, 2, 6 +; CHECK-BE: vinserth 3, 2, 8 +; CHECK-BE: vmr 2, 3 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_13_3(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_13_3 +; CHECK: vsldoi 2, 2, 2, 2 +; CHECK: vinserth 3, 2, 4 +; CHECK: vmr 2, 3 +; CHECK-BE-LABEL: shuffle_vector_halfword_13_3 +; CHECK-BE: vinserth 3, 2, 10 +; CHECK-BE: vmr 2, 3 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_14_5(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_14_5 +; CHECK: vsldoi 2, 2, 2, 14 +; CHECK: vinserth 3, 2, 2 +; CHECK: vmr 2, 3 +; CHECK-BE-LABEL: shuffle_vector_halfword_14_5 +; CHECK-BE: vsldoi 2, 2, 2, 4 +; CHECK-BE: vinserth 3, 2, 12 +; CHECK-BE: vmr 2, 3 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_15_0(<8 x i16> %a, <8 x i16> %b) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_15_0 +; CHECK: vsldoi 2, 2, 2, 8 +; CHECK: vinserth 3, 2, 0 +; CHECK: vmr 2, 3 +; CHECK-BE-LABEL: shuffle_vector_halfword_15_0 +; CHECK-BE: vsldoi 2, 2, 2, 10 +; CHECK-BE: vinserth 3, 2, 14 +; CHECK-BE: vmr 2, 3 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +; The following testcases use the same vector in both arguments of the +; shufflevector. If halfword element 3 in BE mode(or 4 in LE mode) is the one +; we're attempting to insert, then we can use the vector insert instruction +define <8 x i16> @shuffle_vector_halfword_0_4(<8 x i16> %a) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_0_4 +; CHECK: vinserth 2, 2, 14 +; CHECK-BE-LABEL: shuffle_vector_halfword_0_4 +; CHECK-BE-NOT: vinserth + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_1_3(<8 x i16> %a) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_1_3 +; CHECK-NOT: vinserth +; CHECK-BE-LABEL: shuffle_vector_halfword_1_3 +; CHECK-BE: vinserth 2, 2, 2 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_2_3(<8 x i16> %a) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_2_3 +; CHECK-NOT: vinserth +; CHECK-BE-LABEL: shuffle_vector_halfword_2_3 +; CHECK-BE: vinserth 2, 2, 4 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_3_4(<8 x i16> %a) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_3_4 +; CHECK: vinserth 2, 2, 8 +; CHECK-BE-LABEL: shuffle_vector_halfword_3_4 +; CHECK-BE-NOT: vinserth + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_4_3(<8 x i16> %a) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_4_3 +; CHECK-NOT: vinserth +; CHECK-BE-LABEL: shuffle_vector_halfword_4_3 +; CHECK-BE: vinserth 2, 2, 8 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_5_3(<8 x i16> %a) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_5_3 +; CHECK-NOT: vinserth +; CHECK-BE-LABEL: shuffle_vector_halfword_5_3 +; CHECK-BE: vinserth 2, 2, 10 + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_6_4(<8 x i16> %a) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_6_4 +; CHECK: vinserth 2, 2, 2 +; CHECK-BE-LABEL: shuffle_vector_halfword_6_4 +; CHECK-BE-NOT: vinserth + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_7_4(<8 x i16> %a) { +entry: +; CHECK-LABEL: shuffle_vector_halfword_7_4 +; CHECK: vinserth 2, 2, 0 +; CHECK-BE-LABEL: shuffle_vector_halfword_7_4 +; CHECK-BE-NOT: vinserth + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} +