Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -771,6 +771,11 @@ setOperationAction(ISD::SRL, MVT::v1i128, Legal); setOperationAction(ISD::SRA, MVT::v1i128, Expand); } + + if (Subtarget.hasP9Altivec()) { + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); + } } if (Subtarget.hasQPX()) { @@ -8746,11 +8751,30 @@ SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"); + ConstantSDNode *C = dyn_cast(Op.getOperand(2)); // We have legal lowering for constant indices but not for variable ones. - if (C) - return Op; - return SDValue(); + if (!C) + return SDValue(); + + EVT VT = Op.getValueType(); + SDLoc dl(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + SDValue V3 = Op.getOperand(2); + // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types. + if (VT == MVT::v8i16 || VT == MVT::v16i8) { + SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2); + unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8; + unsigned InsertAtElement = C->getZExtValue(); + unsigned InsertAtByte = InsertAtElement * BytesInEachElement; + if (Subtarget.isLittleEndian()) { + InsertAtByte = (16 - BytesInEachElement) - InsertAtByte; + } + return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz, + DAG.getConstant(InsertAtByte, dl, MVT::i32)); + } + return Op; } SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -1767,6 +1767,32 @@ (COPY_TO_REGCLASS $S, VRRC), BE_VDWORD_PERM_VEC); dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC); + + dag P9ALTIVEC_LE_HALF_3 = + (EXTRACT_SUBREG (MFVSRD (EXTRACT_SUBREG (VEXTRACTUH 8, $S), sub_64)), sub_32); + dag P9ALTIVEC_LE_HALF_2 = + (EXTRACT_SUBREG (MFVSRD (EXTRACT_SUBREG (VEXTRACTUH 10, $S), sub_64)), sub_32); + dag P9ALTIVEC_LE_HALF_1 = + (EXTRACT_SUBREG (MFVSRD (EXTRACT_SUBREG (VEXTRACTUH 12, $S), sub_64)), sub_32); + dag P9ALTIVEC_LE_HALF_0 = + (EXTRACT_SUBREG (MFVSRD (EXTRACT_SUBREG (VEXTRACTUH 14, $S), sub_64)), sub_32); + + dag P9ALTIVEC_LE_BYTE_7 = + (EXTRACT_SUBREG (MFVSRD (EXTRACT_SUBREG (VEXTRACTUB 8, $S), sub_64)), sub_32); + dag P9ALTIVEC_LE_BYTE_6 = + (EXTRACT_SUBREG (MFVSRD (EXTRACT_SUBREG (VEXTRACTUB 9, $S), sub_64)), sub_32); + dag P9ALTIVEC_LE_BYTE_5 = + (EXTRACT_SUBREG (MFVSRD (EXTRACT_SUBREG (VEXTRACTUB 10, $S), sub_64)), sub_32); + dag P9ALTIVEC_LE_BYTE_4 = + (EXTRACT_SUBREG (MFVSRD (EXTRACT_SUBREG (VEXTRACTUB 11, $S), sub_64)), sub_32); + dag P9ALTIVEC_LE_BYTE_3 = + (EXTRACT_SUBREG (MFVSRD (EXTRACT_SUBREG (VEXTRACTUB 12, $S), sub_64)), sub_32); + dag P9ALTIVEC_LE_BYTE_2 = + (EXTRACT_SUBREG (MFVSRD (EXTRACT_SUBREG (VEXTRACTUB 13, $S), sub_64)), sub_32); + dag P9ALTIVEC_LE_BYTE_1 = + (EXTRACT_SUBREG (MFVSRD (EXTRACT_SUBREG (VEXTRACTUB 14, $S), sub_64)), sub_32); + dag P9ALTIVEC_LE_BYTE_0 = + (EXTRACT_SUBREG (MFVSRD (EXTRACT_SUBREG (VEXTRACTUB 15, $S), sub_64)), sub_32); } let AddedComplexity = 400 in { @@ -1877,6 +1903,64 @@ (i64 VectorExtractions.BE_VARIABLE_DWORD)>; } // IsBigEndian, HasDirectMove +// Better vector_extract patterns when extracting from dword[1] of VSR. +let Predicates = [IsBigEndian, HasP9Altivec] in { + def : Pat<(i32 (vector_extract v8i16:$S, 4)), + (i32 VectorExtractions.P9ALTIVEC_LE_HALF_3)>; + def : Pat<(i32 (vector_extract v8i16:$S, 5)), + (i32 VectorExtractions.P9ALTIVEC_LE_HALF_2)>; + def : Pat<(i32 (vector_extract v8i16:$S, 6)), + (i32 VectorExtractions.P9ALTIVEC_LE_HALF_1)>; + def : Pat<(i32 (vector_extract v8i16:$S, 7)), + (i32 VectorExtractions.P9ALTIVEC_LE_HALF_0)>; + + def : Pat<(i32 (vector_extract v16i8:$S, 8)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_7)>; + def : Pat<(i32 (vector_extract v16i8:$S, 9)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_6)>; + def : Pat<(i32 (vector_extract v16i8:$S, 10)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_5)>; + def : Pat<(i32 (vector_extract v16i8:$S, 11)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_4)>; + def : Pat<(i32 (vector_extract v16i8:$S, 12)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_3)>; + def : Pat<(i32 (vector_extract v16i8:$S, 13)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_2)>; + def : Pat<(i32 (vector_extract v16i8:$S, 14)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_1)>; + def : Pat<(i32 (vector_extract v16i8:$S, 15)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_0)>; +} // IsBigEndian, HasP9Altivec + +// Better vector_extract patterns when extracting from dword[1] of VSR. +let Predicates = [IsLittleEndian, HasP9Altivec] in { + def : Pat<(i32 (vector_extract v8i16:$S, 0)), + (i32 VectorExtractions.P9ALTIVEC_LE_HALF_0)>; + def : Pat<(i32 (vector_extract v8i16:$S, 1)), + (i32 VectorExtractions.P9ALTIVEC_LE_HALF_1)>; + def : Pat<(i32 (vector_extract v8i16:$S, 2)), + (i32 VectorExtractions.P9ALTIVEC_LE_HALF_2)>; + def : Pat<(i32 (vector_extract v8i16:$S, 3)), + (i32 VectorExtractions.P9ALTIVEC_LE_HALF_3)>; + + def : Pat<(i32 (vector_extract v16i8:$S, 0)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_0)>; + def : Pat<(i32 (vector_extract v16i8:$S, 1)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_1)>; + def : Pat<(i32 (vector_extract v16i8:$S, 2)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_2)>; + def : Pat<(i32 (vector_extract v16i8:$S, 3)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_3)>; + def : Pat<(i32 (vector_extract v16i8:$S, 4)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_4)>; + def : Pat<(i32 (vector_extract v16i8:$S, 5)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_5)>; + def : Pat<(i32 (vector_extract v16i8:$S, 6)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_6)>; + def : Pat<(i32 (vector_extract v16i8:$S, 7)), + (i32 VectorExtractions.P9ALTIVEC_LE_BYTE_7)>; +} // IsLittleEndian, HasP9Altivec + // v4f32 scalar <-> vector conversions (LE) let Predicates = [IsLittleEndian, HasP8Vector] in { def : Pat<(v4f32 (scalar_to_vector f32:$A)), @@ -2458,6 +2542,13 @@ UseVSXReg; } // mayStore + // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead + // of f64 + def : Pat<(v8i16 (PPCmtvsrz i32:$A)), + (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; + def : Pat<(v16i8 (PPCmtvsrz i32:$A)), + (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>; + // Patterns for which instructions from ISA 3.0 are a better match let Predicates = [IsLittleEndian, HasP9Vector] in { def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))), Index: test/CodeGen/PowerPC/p9-vinsert-vextract.ll =================================================================== --- test/CodeGen/PowerPC/p9-vinsert-vextract.ll +++ test/CodeGen/PowerPC/p9-vinsert-vextract.ll @@ -876,3 +876,493 @@ %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> ret <16 x i8> %vecins } + +; The following tests try to insert one halfword element into the vector. We +; should always be using the 'vinserth' instruction. +define <8 x i16> @insert_halfword_0(<8 x i16> %a, i16 %b) { +entry: +; CHECK-LABEL: insert_halfword_0 +; CHECK: vinserth 2, 3, 14 +; CHECK-BE-LABEL: insert_halfword_0 +; CHECK-BE: vinserth 2, 3, 0 + %vecins = insertelement <8 x i16> %a, i16 %b, i32 0 + ret <8 x i16> %vecins +} + +define <8 x i16> @insert_halfword_1(<8 x i16> %a, i16 %b) { +entry: +; CHECK-LABEL: insert_halfword_1 +; CHECK: vinserth 2, 3, 12 +; CHECK-BE-LABEL: insert_halfword_1 +; CHECK-BE: vinserth 2, 3, 2 + %vecins = insertelement <8 x i16> %a, i16 %b, i32 1 + ret <8 x i16> %vecins +} + +define <8 x i16> @insert_halfword_2(<8 x i16> %a, i16 %b) { +entry: +; CHECK-LABEL: insert_halfword_2 +; CHECK: vinserth 2, 3, 10 +; CHECK-BE-LABEL: insert_halfword_2 +; CHECK-BE: vinserth 2, 3, 4 + %vecins = insertelement <8 x i16> %a, i16 %b, i32 2 + ret <8 x i16> %vecins +} + +define <8 x i16> @insert_halfword_3(<8 x i16> %a, i16 %b) { +entry: +; CHECK-LABEL: insert_halfword_3 +; CHECK: vinserth 2, 3, 8 +; CHECK-BE-LABEL: insert_halfword_3 +; CHECK-BE: vinserth 2, 3, 6 + %vecins = insertelement <8 x i16> %a, i16 %b, i32 3 + ret <8 x i16> %vecins +} + +define <8 x i16> @insert_halfword_4(<8 x i16> %a, i16 %b) { +entry: +; CHECK-LABEL: insert_halfword_4 +; CHECK: vinserth 2, 3, 6 +; CHECK-BE-LABEL: insert_halfword_4 +; CHECK-BE: vinserth 2, 3, 8 + %vecins = insertelement <8 x i16> %a, i16 %b, i32 4 + ret <8 x i16> %vecins +} + +define <8 x i16> @insert_halfword_5(<8 x i16> %a, i16 %b) { +entry: +; CHECK-LABEL: insert_halfword_5 +; CHECK: vinserth 2, 3, 4 +; CHECK-BE-LABEL: insert_halfword_5 +; CHECK-BE: vinserth 2, 3, 10 + %vecins = insertelement <8 x i16> %a, i16 %b, i32 5 + ret <8 x i16> %vecins +} + +define <8 x i16> @insert_halfword_6(<8 x i16> %a, i16 %b) { +entry: +; CHECK-LABEL: insert_halfword_6 +; CHECK: vinserth 2, 3, 2 +; CHECK-BE-LABEL: insert_halfword_6 +; CHECK-BE: vinserth 2, 3, 12 + %vecins = insertelement <8 x i16> %a, i16 %b, i32 6 + ret <8 x i16> %vecins +} + +define <8 x i16> @insert_halfword_7(<8 x i16> %a, i16 %b) { +entry: +; CHECK-LABEL: insert_halfword_7 +; CHECK: vinserth 2, 3, 0 +; CHECK-BE-LABEL: insert_halfword_7 +; CHECK-BE: vinserth 2, 3, 14 + %vecins = insertelement <8 x i16> %a, i16 %b, i32 7 + ret <8 x i16> %vecins +} + +; The following tests try to insert one byte element into the vector. We +; should always be using the 'vinsertb' instruction. +define <16 x i8> @insert_byte_0(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_0 +; CHECK: vinsertb 2, 3, 15 +; CHECK-BE-LABEL: insert_byte_0 +; CHECK-BE: vinsertb 2, 3, 0 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 0 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_1(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_1 +; CHECK: vinsertb 2, 3, 14 +; CHECK-BE-LABEL: insert_byte_1 +; CHECK-BE: vinsertb 2, 3, 1 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 1 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_2(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_2 +; CHECK: vinsertb 2, 3, 13 +; CHECK-BE-LABEL: insert_byte_2 +; CHECK-BE: vinsertb 2, 3, 2 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 2 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_3(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_3 +; CHECK: vinsertb 2, 3, 12 +; CHECK-BE-LABEL: insert_byte_3 +; CHECK-BE: vinsertb 2, 3, 3 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 3 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_4(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_4 +; CHECK: vinsertb 2, 3, 11 +; CHECK-BE-LABEL: insert_byte_4 +; CHECK-BE: vinsertb 2, 3, 4 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 4 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_5(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_5 +; CHECK: vinsertb 2, 3, 10 +; CHECK-BE-LABEL: insert_byte_5 +; CHECK-BE: vinsertb 2, 3, 5 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 5 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_6(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_6 +; CHECK: vinsertb 2, 3, 9 +; CHECK-BE-LABEL: insert_byte_6 +; CHECK-BE: vinsertb 2, 3, 6 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 6 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_7(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_7 +; CHECK: vinsertb 2, 3, 8 +; CHECK-BE-LABEL: insert_byte_7 +; CHECK-BE: vinsertb 2, 3, 7 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 7 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_8(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_8 +; CHECK: vinsertb 2, 3, 7 +; CHECK-BE-LABEL: insert_byte_8 +; CHECK-BE: vinsertb 2, 3, 8 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 8 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_9(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_9 +; CHECK: vinsertb 2, 3, 6 +; CHECK-BE-LABEL: insert_byte_9 +; CHECK-BE: vinsertb 2, 3, 9 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 9 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_10(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_10 +; CHECK: vinsertb 2, 3, 5 +; CHECK-BE-LABEL: insert_byte_10 +; CHECK-BE: vinsertb 2, 3, 10 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 10 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_11(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_11 +; CHECK: vinsertb 2, 3, 4 +; CHECK-BE-LABEL: insert_byte_11 +; CHECK-BE: vinsertb 2, 3, 11 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 11 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_12(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_12 +; CHECK: vinsertb 2, 3, 3 +; CHECK-BE-LABEL: insert_byte_12 +; CHECK-BE: vinsertb 2, 3, 12 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 12 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_13(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_13 +; CHECK: vinsertb 2, 3, 2 +; CHECK-BE-LABEL: insert_byte_13 +; CHECK-BE: vinsertb 2, 3, 13 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 13 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_14(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_14 +; CHECK: vinsertb 2, 3, 1 +; CHECK-BE-LABEL: insert_byte_14 +; CHECK-BE: vinsertb 2, 3, 14 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 14 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_15(<16 x i8> %a, i8 %b) { +entry: +; CHECK-LABEL: insert_byte_15 +; CHECK: vinsertb 2, 3, 0 +; CHECK-BE-LABEL: insert_byte_15 +; CHECK-BE: vinsertb 2, 3, 15 + %vecins = insertelement <16 x i8> %a, i8 %b, i32 15 + ret <16 x i8> %vecins +} + +; The following tests try to extract one halfword element from the vector. We +; should only be using the 'vextractuh' instruction when extracting elements +; [0,3] on LE and [4,7] on BE to avoid a xxswapd. +define i16 @extract_halfword_0(<8 x i16> %a) { +entry: +; CHECK-LABEL: extract_halfword_0 +; CHECK: vextractuh 2, 2, 14 +; CHECK-BE-LABEL: extract_halfword_0 +; CHECK-BE-NOT: vextractuh + %vecext = extractelement <8 x i16> %a, i32 0 + ret i16 %vecext +} + +define i16 @extract_halfword_1(<8 x i16> %a) { +entry: +; CHECK-LABEL: extract_halfword_1 +; CHECK: vextractuh 2, 2, 12 +; CHECK-BE-LABEL: extract_halfword_1 +; CHECK-BE-NOT: vextractuh + %vecext = extractelement <8 x i16> %a, i32 1 + ret i16 %vecext +} + +define i16 @extract_halfword_2(<8 x i16> %a) { +entry: +; CHECK-LABEL: extract_halfword_2 +; CHECK: vextractuh 2, 2, 10 +; CHECK-BE-LABEL: extract_halfword_2 +; CHECK-BE-NOT: vextractuh + %vecext = extractelement <8 x i16> %a, i32 2 + ret i16 %vecext +} + +define i16 @extract_halfword_3(<8 x i16> %a) { +entry: +; CHECK-LABEL: extract_halfword_3 +; CHECK: vextractuh 2, 2, 8 +; CHECK-BE-LABEL: extract_halfword_3 +; CHECK-BE-NOT: vextractuh + %vecext = extractelement <8 x i16> %a, i32 3 + ret i16 %vecext +} + +define i16 @extract_halfword_4(<8 x i16> %a) { +entry: +; CHECK-LABEL: extract_halfword_4 +; CHECK-NOT: vextractuh +; CHECK-BE-LABEL: extract_halfword_4 +; CHECK-BE: vextractuh 2, 2, 8 + %vecext = extractelement <8 x i16> %a, i32 4 + ret i16 %vecext +} + +define i16 @extract_halfword_5(<8 x i16> %a) { +entry: +; CHECK-LABEL: extract_halfword_5 +; CHECK-NOT: vextractuh +; CHECK-BE-LABEL: extract_halfword_5 +; CHECK-BE: vextractuh 2, 2, 10 + %vecext = extractelement <8 x i16> %a, i32 5 + ret i16 %vecext +} + +define i16 @extract_halfword_6(<8 x i16> %a) { +entry: +; CHECK-LABEL: extract_halfword_6 +; CHECK-NOT: vextractuh +; CHECK-BE-LABEL: extract_halfword_6 +; CHECK-BE: vextractuh 2, 2, 12 + %vecext = extractelement <8 x i16> %a, i32 6 + ret i16 %vecext +} + +define i16 @extract_halfword_7(<8 x i16> %a) { +entry: +; CHECK-LABEL: extract_halfword_7 +; CHECK-NOT: vextractuh +; CHECK-BE-LABEL: extract_halfword_7 +; CHECK-BE: vextractuh 2, 2, 14 + %vecext = extractelement <8 x i16> %a, i32 7 + ret i16 %vecext +} + +; The following tests try to extract one byte element from the vector. We +; should only be using the 'vextractub' instruction when extracting elements +; [0,7] on LE and [8,15] on BE to avoid a xxswapd. +define i8 @extract_byte_0(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_0 +; CHECK: vextractub 2, 2, 15 +; CHECK-BE-LABEL: extract_byte_0 +; CHECK-BE-NOT: vextractub + %vecext = extractelement <16 x i8> %a, i32 0 + ret i8 %vecext +} + +define i8 @extract_byte_1(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_1 +; CHECK: vextractub 2, 2, 14 +; CHECK-BE-LABEL: extract_byte_1 +; CHECK-BE-NOT: vextractub + %vecext = extractelement <16 x i8> %a, i32 1 + ret i8 %vecext +} + +define i8 @extract_byte_2(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_2 +; CHECK: vextractub 2, 2, 13 +; CHECK-BE-LABEL: extract_byte_2 +; CHECK-BE-NOT: vextractub + %vecext = extractelement <16 x i8> %a, i32 2 + ret i8 %vecext +} + +define i8 @extract_byte_3(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_3 +; CHECK: vextractub 2, 2, 12 +; CHECK-BE-LABEL: extract_byte_3 +; CHECK-BE-NOT: vextractub + %vecext = extractelement <16 x i8> %a, i32 3 + ret i8 %vecext +} + +define i8 @extract_byte_4(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_4 +; CHECK: vextractub 2, 2, 11 +; CHECK-BE-LABEL: extract_byte_4 +; CHECK-BE-NOT: vextractub + %vecext = extractelement <16 x i8> %a, i32 4 + ret i8 %vecext +} + +define i8 @extract_byte_5(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_5 +; CHECK: vextractub 2, 2, 10 +; CHECK-BE-LABEL: extract_byte_5 +; CHECK-BE-NOT: vextractub + %vecext = extractelement <16 x i8> %a, i32 5 + ret i8 %vecext +} + +define i8 @extract_byte_6(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_6 +; CHECK: vextractub 2, 2, 9 +; CHECK-BE-LABEL: extract_byte_6 +; CHECK-BE-NOT: vextractub + %vecext = extractelement <16 x i8> %a, i32 6 + ret i8 %vecext +} + +define i8 @extract_byte_7(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_7 +; CHECK: vextractub 2, 2, 8 +; CHECK-BE-LABEL: extract_byte_7 +; CHECK-BE-NOT: vextractub + %vecext = extractelement <16 x i8> %a, i32 7 + ret i8 %vecext +} + +define i8 @extract_byte_8(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_8 +; CHECK-NOT: vextractub +; CHECK-BE-LABEL: extract_byte_8 +; CHECK-BE: vextractub 2, 2, 8 + %vecext = extractelement <16 x i8> %a, i32 8 + ret i8 %vecext +} + +define i8 @extract_byte_9(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_9 +; CHECK-NOT: vextractub +; CHECK-BE-LABEL: extract_byte_9 +; CHECK-BE: vextractub 2, 2, 9 + %vecext = extractelement <16 x i8> %a, i32 9 + ret i8 %vecext +} + +define i8 @extract_byte_10(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_10 +; CHECK-NOT: vextractub +; CHECK-BE-LABEL: extract_byte_10 +; CHECK-BE: vextractub 2, 2, 10 + %vecext = extractelement <16 x i8> %a, i32 10 + ret i8 %vecext +} + +define i8 @extract_byte_11(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_11 +; CHECK-NOT: vextractub +; CHECK-BE-LABEL: extract_byte_11 +; CHECK-BE: vextractub 2, 2, 11 + %vecext = extractelement <16 x i8> %a, i32 11 + ret i8 %vecext +} + +define i8 @extract_byte_12(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_12 +; CHECK-NOT: vextractub +; CHECK-BE-LABEL: extract_byte_12 +; CHECK-BE: vextractub 2, 2, 12 + %vecext = extractelement <16 x i8> %a, i32 12 + ret i8 %vecext +} + +define i8 @extract_byte_13(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_13 +; CHECK-NOT: vextractub +; CHECK-BE-LABEL: extract_byte_13 +; CHECK-BE: vextractub 2, 2, 13 + %vecext = extractelement <16 x i8> %a, i32 13 + ret i8 %vecext +} + +define i8 @extract_byte_14(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_14 +; CHECK-NOT: vextractub +; CHECK-BE-LABEL: extract_byte_14 +; CHECK-BE: vextractub 2, 2, 14 + %vecext = extractelement <16 x i8> %a, i32 14 + ret i8 %vecext +} + +define i8 @extract_byte_15(<16 x i8> %a) { +entry: +; CHECK-LABEL: extract_byte_15 +; CHECK-NOT: vextractub +; CHECK-BE-LABEL: extract_byte_15 +; CHECK-BE: vextractub 2, 2, 15 + %vecext = extractelement <16 x i8> %a, i32 15 + ret i8 %vecext +}