Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -65,6 +65,14 @@ /// XXSPLT, + /// XXINSERT - The PPC VSX insert instruction + /// + XXINSERT, + + /// VECSHL - The PPC VSX shift left instruction + /// + VECSHL, + /// The CMPB instruction (takes two operands of i32 or i64). CMPB, @@ -406,6 +414,16 @@ /// VSPLTB/VSPLTH/VSPLTW. bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); + /// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by + /// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any + /// shuffle of v4f32/v4i32 vectors that just inserts one element from one + /// vector into the other. This function will also set a couple of + /// output parameters for how much the source vector needs to be shifted and + /// what byte number needs to be specified for the instruction to put the + /// element in the desired location of the target vector. + bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + unsigned &InsertAtByte, bool &Swap, bool IsLE); + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -665,6 +665,10 @@ addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); } + if (Subtarget.hasP9Vector()) { + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); + } } if (Subtarget.hasQPX()) { @@ -1017,6 +1021,8 @@ case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; + case PPCISD::XXINSERT: return "PPCISD::XXINSERT"; + case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; @@ -1491,6 +1497,98 @@ return true; } +/// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by +/// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any +/// shuffle of v4f32/v4i32 vectors that just inserts one element from one vector +/// into the other. This function will also set a couple of +/// output parameters for how much the source vector needs to be shifted and +/// what byte number needs to be specified for the instruction to put the +/// element in the desired location of the target vector. +bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + unsigned &InsertAtByte, bool &Swap, bool IsLE) { + + // Check that the mask is shuffling words + for (unsigned i = 0; i < 4; ++i) { + unsigned B0 = N->getMaskElt(i*4); + unsigned B1 = N->getMaskElt(i*4+1); + unsigned B2 = N->getMaskElt(i*4+2); + unsigned B3 = N->getMaskElt(i*4+3); + if (B0 % 4) + return false; + if (B1 != B0+1 || B2 != B1+1 || B3 != B2+1) + return false; + } + + // Now we look at mask elements 0,4,8,12 + unsigned M0 = N->getMaskElt(0) / 4; + unsigned M1 = N->getMaskElt(4) / 4; + unsigned M2 = N->getMaskElt(8) / 4; + unsigned M3 = N->getMaskElt(12) / 4; + unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; + unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; + + // Below, let H and L be arbitrary elements of the shuffle mask + // where H is in the range [4,7] and L is in the range [0,3]. + // H, 1, 2, 3 or L, 5, 6, 7 + if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) || + (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) { + ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3]; + InsertAtByte = IsLE ? 12 : 0; + Swap = M0 < 4; + return true; + } + // 0, H, 2, 3 or 4, L, 6, 7 + if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) || + (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) { + ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3]; + InsertAtByte = IsLE ? 8 : 4; + Swap = M1 < 4; + return true; + } + // 0, 1, H, 3 or 4, 5, L, 7 + if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) || + (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) { + ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3]; + InsertAtByte = IsLE ? 4 : 8; + Swap = M2 < 4; + return true; + } + // 0, 1, 2, H or 4, 5, 6, L + if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) || + (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) { + ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3]; + InsertAtByte = IsLE ? 0 : 12; + Swap = M3 < 4; + return true; + } + + // If both vector operands for the shuffle are the same vector, the mask will + // contain only elements from the first one and the second one will be undef. + if (N->getOperand(1).isUndef()) { + ShiftElts = 0; + Swap = false; + unsigned XXINSERTWSrcElem = IsLE ? 2 : 1; + if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) { + InsertAtByte = IsLE ? 12 : 0; + return true; + } + if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) { + InsertAtByte = IsLE ? 8 : 4; + return true; + } + if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) { + InsertAtByte = IsLE ? 4 : 8; + return true; + } + if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) { + InsertAtByte = IsLE ? 0 : 12; + return true; + } + } + + return false; +} + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, @@ -7371,6 +7469,27 @@ EVT VT = Op.getValueType(); bool isLittleEndian = Subtarget.isLittleEndian(); + unsigned ShiftElts, InsertAtByte; + bool Swap; + if (Subtarget.hasP9Vector() && + PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, + isLittleEndian)) { + if (Swap) + std::swap(V1, V2); + SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2); + if (ShiftElts) { + SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2, + DAG.getConstant(ShiftElts, dl, MVT::i32)); + SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Shl, + DAG.getConstant(InsertAtByte, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); + } + SDValue Ins = DAG.getNode(PPCISD::XXINSERT, dl, MVT::v4i32, Conv1, Conv2, + DAG.getConstant(InsertAtByte, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); + } + if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -35,6 +35,14 @@ SDTCisVec<1>, SDTCisInt<2> ]>; +def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> +]>; + +def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>, + SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> +]>; + def SDT_PPCvcmp : SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32> ]>; @@ -144,8 +152,10 @@ def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>; def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; -def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; +def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; +def PPCxxinsert : SDNode<"PPCISD::XXINSERT", SDT_PPCVecInsert, []>; +def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>; def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>; Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -772,7 +772,9 @@ def XXSLDWI : XX3Form_2<60, 2, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW), - "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>; + "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, + [(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB, + imm32SExt16:$SHW))]>; def XXSPLTW : XX2Form_2<60, 164, (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), "xxspltw $XT, $XB, $UIM", IIC_VecPerm, @@ -1801,9 +1803,14 @@ (f64 (MTVSRD $S))>; } +def AlignValues { + dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3)); + dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC); +} + // The following VSX instructions were introduced in Power ISA 3.0 def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">; -let Predicates = [HasP9Vector] in { +let AddedComplexity = 400, Predicates = [HasP9Vector] in { // [PO VRT XO VRB XO /] class X_VT5_XO5_VB5 opcode, bits<5> xo2, bits<10> xo, string opc, @@ -2010,13 +2017,17 @@ // Vector Insert Word // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB. - def XXINSERTW : XX2_RD6_UIM5_RS6<60, 181, - (outs vsrc:$XT), (ins u4imm:$UIMM, vsrc:$XB), - "xxinsertw $XT, $XB, $UIMM", IIC_VecFP, []>; + def XXINSERTW : + XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT), + (ins vsrc:$XTi, vsrc:$XB, u4imm:$UIM), + "xxinsertw $XT, $XB, $UIM", IIC_VecFP, + [(set v4i32:$XT, (PPCxxinsert v4i32:$XTi, v4i32:$XB, + imm32SExt16:$UIM))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; // Vector Extract Unsigned Word def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165, - (outs vsrc:$XT), (ins u4imm:$UIMM, vsrc:$XB), + (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM), "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>; // Vector Insert Exponent DP/SP @@ -2155,4 +2166,59 @@ def STXVL : X_XS6_RA5_RB5<31, 397, "stxvl" , vsrc, []>; def STXVLL : X_XS6_RA5_RB5<31, 429, "stxvll" , vsrc, []>; } // end mayStore -} // end HasP9Vector + + // Patterns for which instructions from ISA 3.0 are a better match + let Predicates = [IsLittleEndian] in { + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; + } // IsLittleEndian + + let Predicates = [IsBigEndian] in { + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 0))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 1))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 2))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>; + def : Pat<(f32 (PPCfcfidus (PPCmtvsrz (i32 (extractelt v4i32:$A, 3))))), + (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>; + def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)), + (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>; + def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), + (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; + } // IsLittleEndian +} // end HasP9Vector, AddedComplexity Index: lib/Target/PowerPC/README_P9.txt =================================================================== --- lib/Target/PowerPC/README_P9.txt +++ lib/Target/PowerPC/README_P9.txt @@ -397,6 +397,8 @@ (set f128:$vT, (int_ppc_vsx_xsxsigqp f128$vB)) // xsxsigqp - Vector Insert Word: xxinsertw + - Useful for inserting f32/i32 elements into vectors (the element to be + inserted needs to be prepared) . Note: llvm has insertelem in "Vector Operations" ; yields > = insertelement > , , @@ -409,6 +411,10 @@ (set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM)) - Vector Extract Unsigned Word: xxextractuw + - Not useful for extraction of f32 from v4f32 (the current pattern is better - + shift->convert) + - It is useful for (uint_to_fp (vector_extract v4i32, N)) + - Unfortunately, it can't be used for (sint_to_fp (vector_extract v4i32, N)) . Note: llvm has extractelement in "Vector Operations" ; yields = extractelement > , Index: test/CodeGen/p9-xxinsertw-xxextractuw.ll =================================================================== --- test/CodeGen/p9-xxinsertw-xxextractuw.ll +++ test/CodeGen/p9-xxinsertw-xxextractuw.ll @@ -0,0 +1,970 @@ +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE + +define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 4 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 0 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 4 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 8 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 12 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define float @_Z13testUiToFpExtILj0EEfDv4_j(<4 x i32> %a) { +entry: +; CHECK-LABEL: _Z13testUiToFpExtILj0EEfDv4_j +; CHECK: xxextractuw 0, 34, 12 +; CHECK: xscvuxdsp 1, 0 +; CHECK-BE-LABEL: _Z13testUiToFpExtILj0EEfDv4_j +; CHECK-BE: xxextractuw 0, 34, 0 +; CHECK-BE: xscvuxdsp 1, 0 + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +define float @_Z13testUiToFpExtILj1EEfDv4_j(<4 x i32> %a) { +entry: +; CHECK-LABEL: _Z13testUiToFpExtILj1EEfDv4_j +; CHECK: xxextractuw 0, 34, 8 +; CHECK: xscvuxdsp 1, 0 +; CHECK-BE-LABEL: _Z13testUiToFpExtILj1EEfDv4_j +; CHECK-BE: xxextractuw 0, 34, 4 +; CHECK-BE: xscvuxdsp 1, 0 + %vecext = extractelement <4 x i32> %a, i32 1 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +define float @_Z13testUiToFpExtILj2EEfDv4_j(<4 x i32> %a) { +entry: +; CHECK-LABEL: _Z13testUiToFpExtILj2EEfDv4_j +; CHECK: xxextractuw 0, 34, 4 +; CHECK: xscvuxdsp 1, 0 +; CHECK-BE-LABEL: _Z13testUiToFpExtILj2EEfDv4_j +; CHECK-BE: xxextractuw 0, 34, 8 +; CHECK-BE: xscvuxdsp 1, 0 + %vecext = extractelement <4 x i32> %a, i32 2 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +define float @_Z13testUiToFpExtILj3EEfDv4_j(<4 x i32> %a) { +entry: +; CHECK-LABEL: _Z13testUiToFpExtILj3EEfDv4_j +; CHECK: xxextractuw 0, 34, 0 +; CHECK: xscvuxdsp 1, 0 +; CHECK-BE-LABEL: _Z13testUiToFpExtILj3EEfDv4_j +; CHECK-BE: xxextractuw 0, 34, 12 +; CHECK-BE: xscvuxdsp 1, 0 + %vecext = extractelement <4 x i32> %a, i32 3 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +define <4 x float> @_Z10testInsEltILj0EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_ +; CHECK: xscvdpspn 0, 1 +; CHECK: xxsldwi 0, 0, 0, 3 +; CHECK: xxinsertw 34, 0, 12 +; CHECK-BE-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_ +; CHECK-BE: xscvdpspn 0, 1 +; CHECK-BE: xxsldwi 0, 0, 0, 3 +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = insertelement <4 x float> %a, float %b, i32 0 + ret <4 x float> %vecins +} + +define <4 x float> @_Z10testInsEltILj1EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_ +; CHECK: xscvdpspn 0, 1 +; CHECK: xxsldwi 0, 0, 0, 3 +; CHECK: xxinsertw 34, 0, 8 +; CHECK-BE-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_ +; CHECK-BE: xscvdpspn 0, 1 +; CHECK-BE: xxsldwi 0, 0, 0, 3 +; CHECK-BE: xxinsertw 34, 0, 4 + %vecins = insertelement <4 x float> %a, float %b, i32 1 + ret <4 x float> %vecins +} + +define <4 x float> @_Z10testInsEltILj2EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_ +; CHECK: xscvdpspn 0, 1 +; CHECK: xxsldwi 0, 0, 0, 3 +; CHECK: xxinsertw 34, 0, 4 +; CHECK-BE-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_ +; CHECK-BE: xscvdpspn 0, 1 +; CHECK-BE: xxsldwi 0, 0, 0, 3 +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = insertelement <4 x float> %a, float %b, i32 2 + ret <4 x float> %vecins +} + +define <4 x float> @_Z10testInsEltILj3EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_ +; CHECK: xscvdpspn 0, 1 +; CHECK: xxsldwi 0, 0, 0, 3 +; CHECK: xxinsertw 34, 0, 0 +; CHECK-BE-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_ +; CHECK-BE: xscvdpspn 0, 1 +; CHECK-BE: xxsldwi 0, 0, 0, 3 +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = insertelement <4 x float> %a, float %b, i32 3 + ret <4 x float> %vecins +} + +define <4 x i32> @_Z10testInsEltILj0EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj0EDv4_jjET0_S1_T1_ +; CHECK: mtvsrwz 0, 5 +; CHECK: xxinsertw 34, 0, 12 +; CHECK-BE-LABEL: _Z10testInsEltILj0EDv4_jjET0_S1_T1_ +; CHECK-BE: mtvsrwz 0, 5 +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = insertelement <4 x i32> %a, i32 %b, i32 0 + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z10testInsEltILj1EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj1EDv4_jjET0_S1_T1_ +; CHECK: mtvsrwz 0, 5 +; CHECK: xxinsertw 34, 0, 8 +; CHECK-BE-LABEL: _Z10testInsEltILj1EDv4_jjET0_S1_T1_ +; CHECK-BE: mtvsrwz 0, 5 +; CHECK-BE: xxinsertw 34, 0, 4 + %vecins = insertelement <4 x i32> %a, i32 %b, i32 1 + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z10testInsEltILj2EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj2EDv4_jjET0_S1_T1_ +; CHECK: mtvsrwz 0, 5 +; CHECK: xxinsertw 34, 0, 4 +; CHECK-BE-LABEL: _Z10testInsEltILj2EDv4_jjET0_S1_T1_ +; CHECK-BE: mtvsrwz 0, 5 +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = insertelement <4 x i32> %a, i32 %b, i32 2 + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z10testInsEltILj3EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj3EDv4_jjET0_S1_T1_ +; CHECK: mtvsrwz 0, 5 +; CHECK: xxinsertw 34, 0, 0 +; CHECK-BE-LABEL: _Z10testInsEltILj3EDv4_jjET0_S1_T1_ +; CHECK-BE: mtvsrwz 0, 5 +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = insertelement <4 x i32> %a, i32 %b, i32 3 + ret <4 x i32> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 0 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 4 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 8 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 12 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 0 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 12 +; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 4 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 8 +; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 8 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 4 +; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 2 +; CHECK: xxinsertw 34, 0, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 3 +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_ +; CHECK-BE-NOT: xxsldwi +; CHECK-BE: xxinsertw 34, 35, 12 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 1 +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 0 +; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_ +; CHECK-BE: xxsldwi 0, 35, 35, 2 +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} +define <4 x float> @testSameVecEl0BE(<4 x float> %a) { +entry: +; CHECK-BE-LABEL: testSameVecEl0BE +; CHECK-BE: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @testSameVecEl2BE(<4 x float> %a) { +entry: +; CHECK-BE-LABEL: testSameVecEl2BE +; CHECK-BE: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @testSameVecEl3BE(<4 x float> %a) { +entry: +; CHECK-BE-LABEL: testSameVecEl3BE +; CHECK-BE: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @testSameVecEl0LE(<4 x float> %a) { +entry: +; CHECK-LABEL: testSameVecEl0LE +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @testSameVecEl1LE(<4 x float> %a) { +entry: +; CHECK-LABEL: testSameVecEl1LE +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @testSameVecEl3LE(<4 x float> %a) { +entry: +; CHECK-LABEL: testSameVecEl3LE +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +}