Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3694,6 +3694,7 @@ return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1, DAG.getConstant(Lane, MVT::i64)); } + // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert // by element from V2 to V1 . // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a @@ -3736,12 +3737,10 @@ InsV = DAG.getNode(ISD::UNDEF, dl, VT); } - SDValue PassN; - for (int I = 0, E = InsMasks.size(); I != E; ++I) { SDValue ExtV = V1; int Mask = InsMasks[I]; - if (Mask > V1EltNum) { + if (Mask >= V1EltNum) { ExtV = V2; Mask -= V1EltNum; } @@ -3754,12 +3753,12 @@ else EltVT = (EltSize == 64)? MVT::i64: MVT::i32; - PassN = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV, - DAG.getConstant(Mask, MVT::i64)); - PassN = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, PassN, - DAG.getConstant(InsIndex[I], MVT::i64)); + ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV, + DAG.getConstant(Mask, MVT::i64)); + InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV, + DAG.getConstant(InsIndex[I], MVT::i64)); } - return PassN; + return InsV; } return SDValue(); Index: lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- lib/Target/AArch64/AArch64InstrFormats.td +++ lib/Target/AArch64/AArch64InstrFormats.td @@ -971,6 +971,24 @@ : InstAlias { } +// Format AdvSIMD perm +class NeonI_Perm size, bits<3> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm { + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29-24} = 0b001110; + let Inst{23-22} = size; + let Inst{21} = 0b0; + // Inherit Rm in 20-16 + let Inst{15} = 0b0; + let Inst{14-12} = opcode; + let Inst{11-10} = 0b10; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + // Format AdvSIMD 3 vector registers with same vector type class NeonI_3VSame size, bits<5> opcode, dag outs, dag ins, string asmstr, @@ -1163,8 +1181,7 @@ class NeonI_2VAcross size, bits<5> opcode, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRdn -{ + : A64InstRdn { let Inst{31} = 0b0; let Inst{30} = q; let Inst{29} = u; @@ -1198,8 +1215,7 @@ class NeonI_LdStMult opcode, bits<2> size, dag outs, dag ins, string asmstr, list patterns, InstrItinClass itin> - : A64InstRtn -{ + : A64InstRtn { let Inst{31} = 0b0; let Inst{30} = q; let Inst{29-23} = 0b0011000; Index: lib/Target/AArch64/AArch64InstrNEON.td =================================================================== --- lib/Target/AArch64/AArch64InstrNEON.td +++ lib/Target/AArch64/AArch64InstrNEON.td @@ -2335,6 +2335,335 @@ defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv", int_aarch64_neon_vminv>; +// The followings are for instruction class (Perm) + +class NeonI_Permute size, bits<3> opcode, + string asmop, RegisterOperand OpVPR, string OpS> + : NeonI_Perm; + +multiclass NeonI_Perm_pat opcode, string asmop> { + def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop, VPR64, "8b">; + def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop, VPR128, "16b">; + def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop, VPR64, "4h">; + def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop, VPR128, "8h">; + def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop, VPR64, "2s">; + def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop, VPR128, "4s">; + def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop, VPR128, "2d">; +} + +defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1">; +defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1">; +defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1">; +defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2">; +defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2">; +defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2">; + +// Extract and Insert +def NI_ei_i32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins), + (vector_insert node:$Rn, + (i32 (vector_extract node:$Rm, node:$Ext)), + node:$Ins)>; + +def NI_ei_f32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins), + (vector_insert node:$Rn, + (f32 (vector_extract node:$Rm, node:$Ext)), + node:$Ins)>; + +// uzp1 +def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 VPR128:$Rn), + (v16i8 VPR128:$Rn), 2, 1)), + (v16i8 VPR128:$Rn), 4, 2)), + (v16i8 VPR128:$Rn), 6, 3)), + (v16i8 VPR128:$Rn), 8, 4)), + (v16i8 VPR128:$Rn), 10, 5)), + (v16i8 VPR128:$Rn), 12, 6)), + (v16i8 VPR128:$Rn), 14, 7)), + (v16i8 VPR128:$Rm), 0, 8)), + (v16i8 VPR128:$Rm), 2, 9)), + (v16i8 VPR128:$Rm), 4, 10)), + (v16i8 VPR128:$Rm), 6, 11)), + (v16i8 VPR128:$Rm), 8, 12)), + (v16i8 VPR128:$Rm), 10, 13)), + (v16i8 VPR128:$Rm), 12, 14)), + (v16i8 VPR128:$Rm), 14, 15)), + (UZP1vvv_16b VPR128:$Rn, VPR128:$Rm)>; + +class NI_Uzp1_v8 + : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty VPR:$Rn), + (Ty VPR:$Rn), 2, 1)), + (Ty VPR:$Rn), 4, 2)), + (Ty VPR:$Rn), 6, 3)), + (Ty VPR:$Rm), 0, 4)), + (Ty VPR:$Rm), 2, 5)), + (Ty VPR:$Rm), 4, 6)), + (Ty VPR:$Rm), 6, 7)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Uzp1_v8; +def : NI_Uzp1_v8; + +class NI_Uzp1_v4 + : Pat<(Ty (ei (Ty (ei (Ty (ei + (Ty VPR:$Rn), + (Ty VPR:$Rn), 2, 1)), + (Ty VPR:$Rm), 0, 2)), + (Ty VPR:$Rm), 2, 3)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Uzp1_v4; +def : NI_Uzp1_v4; +def : NI_Uzp1_v4; + +// uzp2 +def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 VPR128:$Rm), + (v16i8 VPR128:$Rn), 1, 0)), + (v16i8 VPR128:$Rn), 3, 1)), + (v16i8 VPR128:$Rn), 5, 2)), + (v16i8 VPR128:$Rn), 7, 3)), + (v16i8 VPR128:$Rn), 9, 4)), + (v16i8 VPR128:$Rn), 11, 5)), + (v16i8 VPR128:$Rn), 13, 6)), + (v16i8 VPR128:$Rn), 15, 7)), + (v16i8 VPR128:$Rm), 1, 8)), + (v16i8 VPR128:$Rm), 3, 9)), + (v16i8 VPR128:$Rm), 5, 10)), + (v16i8 VPR128:$Rm), 7, 11)), + (v16i8 VPR128:$Rm), 9, 12)), + (v16i8 VPR128:$Rm), 11, 13)), + (v16i8 VPR128:$Rm), 13, 14)), + (UZP2vvv_16b VPR128:$Rn, VPR128:$Rm)>; + +class NI_Uzp2_v8 + : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty VPR:$Rm), + (Ty VPR:$Rn), 1, 0)), + (Ty VPR:$Rn), 3, 1)), + (Ty VPR:$Rn), 5, 2)), + (Ty VPR:$Rn), 7, 3)), + (Ty VPR:$Rm), 1, 4)), + (Ty VPR:$Rm), 3, 5)), + (Ty VPR:$Rm), 5, 6)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Uzp2_v8; +def : NI_Uzp2_v8; + +class NI_Uzp2_v4 + : Pat<(Ty (ei (Ty (ei (Ty (ei + (Ty VPR:$Rm), + (Ty VPR:$Rn), 1, 0)), + (Ty VPR:$Rn), 3, 1)), + (Ty VPR:$Rm), 1, 2)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Uzp2_v4; +def : NI_Uzp2_v4; +def : NI_Uzp2_v4; + +// zip1 +def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 VPR128:$Rn), + (v16i8 VPR128:$Rm), 0, 1)), + (v16i8 VPR128:$Rn), 1, 2)), + (v16i8 VPR128:$Rm), 1, 3)), + (v16i8 VPR128:$Rn), 2, 4)), + (v16i8 VPR128:$Rm), 2, 5)), + (v16i8 VPR128:$Rn), 3, 6)), + (v16i8 VPR128:$Rm), 3, 7)), + (v16i8 VPR128:$Rn), 4, 8)), + (v16i8 VPR128:$Rm), 4, 9)), + (v16i8 VPR128:$Rn), 5, 10)), + (v16i8 VPR128:$Rm), 5, 11)), + (v16i8 VPR128:$Rn), 6, 12)), + (v16i8 VPR128:$Rm), 6, 13)), + (v16i8 VPR128:$Rn), 7, 14)), + (v16i8 VPR128:$Rm), 7, 15)), + (ZIP1vvv_16b VPR128:$Rn, VPR128:$Rm)>; + +class NI_Zip1_v8 + : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty VPR:$Rn), + (Ty VPR:$Rm), 0, 1)), + (Ty VPR:$Rn), 1, 2)), + (Ty VPR:$Rm), 1, 3)), + (Ty VPR:$Rn), 2, 4)), + (Ty VPR:$Rm), 2, 5)), + (Ty VPR:$Rn), 3, 6)), + (Ty VPR:$Rm), 3, 7)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Zip1_v8; +def : NI_Zip1_v8; + +class NI_Zip1_v4 + : Pat<(Ty (ei (Ty (ei (Ty (ei + (Ty VPR:$Rn), + (Ty VPR:$Rm), 0, 1)), + (Ty VPR:$Rn), 1, 2)), + (Ty VPR:$Rm), 1, 3)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Zip1_v4; +def : NI_Zip1_v4; +def : NI_Zip1_v4; + +// zip2 +def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 VPR128:$Rm), + (v16i8 VPR128:$Rn), 8, 0)), + (v16i8 VPR128:$Rm), 8, 1)), + (v16i8 VPR128:$Rn), 9, 2)), + (v16i8 VPR128:$Rm), 9, 3)), + (v16i8 VPR128:$Rn), 10, 4)), + (v16i8 VPR128:$Rm), 10, 5)), + (v16i8 VPR128:$Rn), 11, 6)), + (v16i8 VPR128:$Rm), 11, 7)), + (v16i8 VPR128:$Rn), 12, 8)), + (v16i8 VPR128:$Rm), 12, 9)), + (v16i8 VPR128:$Rn), 13, 10)), + (v16i8 VPR128:$Rm), 13, 11)), + (v16i8 VPR128:$Rn), 14, 12)), + (v16i8 VPR128:$Rm), 14, 13)), + (v16i8 VPR128:$Rn), 15, 14)), + (ZIP2vvv_16b VPR128:$Rn, VPR128:$Rm)>; + +class NI_Zip2_v8 + : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty VPR:$Rm), + (Ty VPR:$Rn), 4, 0)), + (Ty VPR:$Rm), 4, 1)), + (Ty VPR:$Rn), 5, 2)), + (Ty VPR:$Rm), 5, 3)), + (Ty VPR:$Rn), 6, 4)), + (Ty VPR:$Rm), 6, 5)), + (Ty VPR:$Rn), 7, 6)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Zip2_v8; +def : NI_Zip2_v8; + +class NI_Zip2_v4 + : Pat<(Ty (ei (Ty (ei (Ty (ei + (Ty VPR:$Rm), + (Ty VPR:$Rn), 2, 0)), + (Ty VPR:$Rm), 2, 1)), + (Ty VPR:$Rn), 3, 2)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Zip2_v4; +def : NI_Zip2_v4; +def : NI_Zip2_v4; + +// trn1 +def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 VPR128:$Rn), + (v16i8 VPR128:$Rm), 0, 1)), + (v16i8 VPR128:$Rm), 2, 3)), + (v16i8 VPR128:$Rm), 4, 5)), + (v16i8 VPR128:$Rm), 6, 7)), + (v16i8 VPR128:$Rm), 8, 9)), + (v16i8 VPR128:$Rm), 10, 11)), + (v16i8 VPR128:$Rm), 12, 13)), + (v16i8 VPR128:$Rm), 14, 15)), + (TRN1vvv_16b VPR128:$Rn, VPR128:$Rm)>; + +class NI_Trn1_v8 + : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty VPR:$Rn), + (Ty VPR:$Rm), 0, 1)), + (Ty VPR:$Rm), 2, 3)), + (Ty VPR:$Rm), 4, 5)), + (Ty VPR:$Rm), 6, 7)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Trn1_v8; +def : NI_Trn1_v8; + +class NI_Trn1_v4 + : Pat<(Ty (ei (Ty (ei + (Ty VPR:$Rn), + (Ty VPR:$Rm), 0, 1)), + (Ty VPR:$Rm), 2, 3)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Trn1_v4; +def : NI_Trn1_v4; +def : NI_Trn1_v4; + +// trn2 +def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 + (v16i8 VPR128:$Rm), + (v16i8 VPR128:$Rn), 1, 0)), + (v16i8 VPR128:$Rn), 3, 2)), + (v16i8 VPR128:$Rn), 5, 4)), + (v16i8 VPR128:$Rn), 7, 6)), + (v16i8 VPR128:$Rn), 9, 8)), + (v16i8 VPR128:$Rn), 11, 10)), + (v16i8 VPR128:$Rn), 13, 12)), + (v16i8 VPR128:$Rn), 15, 14)), + (TRN2vvv_16b VPR128:$Rn, VPR128:$Rm)>; + +class NI_Trn2_v8 + : Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 + (Ty VPR:$Rm), + (Ty VPR:$Rn), 1, 0)), + (Ty VPR:$Rn), 3, 2)), + (Ty VPR:$Rn), 5, 4)), + (Ty VPR:$Rn), 7, 6)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Trn2_v8; +def : NI_Trn2_v8; + +class NI_Trn2_v4 + : Pat<(Ty (ei (Ty (ei + (Ty VPR:$Rm), + (Ty VPR:$Rn), 1, 0)), + (Ty VPR:$Rn), 3, 2)), + (INST VPR:$Rn, VPR:$Rm)>; + +def : NI_Trn2_v4; +def : NI_Trn2_v4; +def : NI_Trn2_v4; + +// End of implementation for instruction class (Perm) + // The followings are for instruction class (3V Diff) // normal long/long2 pattern Index: test/CodeGen/AArch64/neon-perm.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/neon-perm.ll @@ -0,0 +1,1676 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +%struct.int8x8x2_t = type { [2 x <8 x i8>] } +%struct.int16x4x2_t = type { [2 x <4 x i16>] } +%struct.int32x2x2_t = type { [2 x <2 x i32>] } +%struct.uint8x8x2_t = type { [2 x <8 x i8>] } +%struct.uint16x4x2_t = type { [2 x <4 x i16>] } +%struct.uint32x2x2_t = type { [2 x <2 x i32>] } +%struct.float32x2x2_t = type { [2 x <2 x float>] } +%struct.poly8x8x2_t = type { [2 x <8 x i8>] } +%struct.poly16x4x2_t = type { [2 x <4 x i16>] } +%struct.int8x16x2_t = type { [2 x <16 x i8>] } +%struct.int16x8x2_t = type { [2 x <8 x i16>] } +%struct.int32x4x2_t = type { [2 x <4 x i32>] } +%struct.uint8x16x2_t = type { [2 x <16 x i8>] } +%struct.uint16x8x2_t = type { [2 x <8 x i16>] } +%struct.uint32x4x2_t = type { [2 x <4 x i32>] } +%struct.float32x4x2_t = type { [2 x <4 x float>] } +%struct.poly8x16x2_t = type { [2 x <16 x i8>] } +%struct.poly16x8x2_t = type { [2 x <8 x i16>] } + +define <8 x i8> @test_vuzp1_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp1_s8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vuzp1q_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzp1q_s8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vuzp1_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp1_s16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vuzp1q_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzp1q_s16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vuzp1_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vuzp1_s32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vuzp1q_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vuzp1q_s32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vuzp1q_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vuzp1q_s64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <8 x i8> @test_vuzp1_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp1_u8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vuzp1q_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzp1q_u8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vuzp1_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp1_u16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vuzp1q_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzp1q_u16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vuzp1_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vuzp1_u32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vuzp1q_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vuzp1q_u32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vuzp1q_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vuzp1q_u64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <2 x float> @test_vuzp1_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vuzp1_f32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %shuffle.i +} + +define <4 x float> @test_vuzp1q_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vuzp1q_f32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <2 x double> @test_vuzp1q_f64(<2 x double> %a, <2 x double> %b) { +; CHECK: test_vuzp1q_f64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %shuffle.i +} + +define <8 x i8> @test_vuzp1_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp1_p8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vuzp1q_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzp1q_p8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vuzp1_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp1_p16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vuzp1q_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzp1q_p16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_vuzp2_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp2_s8: +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vuzp2q_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzp2q_s8: +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vuzp2_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp2_s16: +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vuzp2q_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzp2q_s16: +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vuzp2_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vuzp2_s32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vuzp2q_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vuzp2q_s32: +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vuzp2q_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vuzp2q_s64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <8 x i8> @test_vuzp2_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp2_u8: +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vuzp2q_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzp2q_u8: +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vuzp2_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp2_u16: +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vuzp2q_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzp2q_u16: +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vuzp2_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vuzp2_u32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vuzp2q_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vuzp2q_u32: +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vuzp2q_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vuzp2q_u64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <2 x float> @test_vuzp2_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vuzp2_f32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %shuffle.i +} + +define <4 x float> @test_vuzp2q_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vuzp2q_f32: +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <2 x double> @test_vuzp2q_f64(<2 x double> %a, <2 x double> %b) { +; CHECK: test_vuzp2q_f64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %shuffle.i +} + +define <8 x i8> @test_vuzp2_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp2_p8: +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vuzp2q_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzp2q_p8: +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vuzp2_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp2_p16: +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vuzp2q_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzp2q_p16: +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_vzip1_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip1_s8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vzip1q_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzip1q_s8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vzip1_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip1_s16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vzip1q_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzip1q_s16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vzip1_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vzip1_s32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vzip1q_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vzip1q_s32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vzip1q_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vzip1q_s64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <8 x i8> @test_vzip1_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip1_u8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vzip1q_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzip1q_u8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vzip1_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip1_u16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vzip1q_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzip1q_u16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vzip1_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vzip1_u32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vzip1q_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vzip1q_u32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vzip1q_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vzip1q_u64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <2 x float> @test_vzip1_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vzip1_f32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %shuffle.i +} + +define <4 x float> @test_vzip1q_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vzip1q_f32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <2 x double> @test_vzip1q_f64(<2 x double> %a, <2 x double> %b) { +; CHECK: test_vzip1q_f64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %shuffle.i +} + +define <8 x i8> @test_vzip1_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip1_p8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vzip1q_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzip1q_p8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vzip1_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip1_p16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vzip1q_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzip1q_p16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_vzip2_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip2_s8: +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vzip2q_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzip2q_s8: +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vzip2_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip2_s16: +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vzip2q_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzip2q_s16: +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vzip2_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vzip2_s32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vzip2q_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vzip2q_s32: +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vzip2q_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vzip2q_s64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <8 x i8> @test_vzip2_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip2_u8: +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vzip2q_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzip2q_u8: +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vzip2_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip2_u16: +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vzip2q_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzip2q_u16: +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vzip2_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vzip2_u32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vzip2q_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vzip2q_u32: +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vzip2q_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vzip2q_u64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <2 x float> @test_vzip2_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vzip2_f32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %shuffle.i +} + +define <4 x float> @test_vzip2q_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vzip2q_f32: +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <2 x double> @test_vzip2q_f64(<2 x double> %a, <2 x double> %b) { +; CHECK: test_vzip2q_f64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %shuffle.i +} + +define <8 x i8> @test_vzip2_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip2_p8: +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vzip2q_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzip2q_p8: +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vzip2_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip2_p16: +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vzip2q_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzip2q_p16: +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_vtrn1_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn1_s8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vtrn1q_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrn1q_s8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vtrn1_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn1_s16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vtrn1q_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrn1q_s16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vtrn1_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vtrn1_s32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vtrn1q_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vtrn1q_s32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vtrn1q_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vtrn1q_s64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <8 x i8> @test_vtrn1_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn1_u8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vtrn1q_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrn1q_u8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vtrn1_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn1_u16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vtrn1q_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrn1q_u16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vtrn1_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vtrn1_u32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vtrn1q_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vtrn1q_u32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vtrn1q_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vtrn1q_u64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <2 x float> @test_vtrn1_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vtrn1_f32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +entry: + %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %shuffle.i +} + +define <4 x float> @test_vtrn1q_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vtrn1q_f32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <2 x double> @test_vtrn1q_f64(<2 x double> %a, <2 x double> %b) { +; CHECK: test_vtrn1q_f64: +; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +entry: + %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %shuffle.i +} + +define <8 x i8> @test_vtrn1_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn1_p8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vtrn1q_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrn1q_p8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vtrn1_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn1_p16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vtrn1q_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrn1q_p16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <8 x i8> @test_vtrn2_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn2_s8: +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vtrn2q_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrn2q_s8: +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vtrn2_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn2_s16: +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vtrn2q_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrn2q_s16: +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vtrn2_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vtrn2_s32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vtrn2q_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vtrn2q_s32: +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vtrn2q_s64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vtrn2q_s64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <8 x i8> @test_vtrn2_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn2_u8: +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vtrn2q_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrn2q_u8: +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vtrn2_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn2_u16: +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vtrn2q_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrn2q_u16: +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define <2 x i32> @test_vtrn2_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vtrn2_u32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle.i +} + +define <4 x i32> @test_vtrn2q_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vtrn2q_u32: +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle.i +} + +define <2 x i64> @test_vtrn2q_u64(<2 x i64> %a, <2 x i64> %b) { +; CHECK: test_vtrn2q_u64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %shuffle.i +} + +define <2 x float> @test_vtrn2_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vtrn2_f32: +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %shuffle.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + ret <2 x float> %shuffle.i +} + +define <4 x float> @test_vtrn2q_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vtrn2q_f32: +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %shuffle.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %shuffle.i +} + +define <2 x double> @test_vtrn2q_f64(<2 x double> %a, <2 x double> %b) { +; CHECK: test_vtrn2q_f64: +; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] +entry: + %shuffle.i = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %shuffle.i +} + +define <8 x i8> @test_vtrn2_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn2_p8: +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle.i +} + +define <16 x i8> @test_vtrn2q_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrn2q_p8: +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle.i +} + +define <4 x i16> @test_vtrn2_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn2_p16: +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle.i +} + +define <8 x i16> @test_vtrn2q_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrn2q_p16: +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %shuffle.i +} + +define %struct.int8x8x2_t @test_vuzp_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp_s8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1 + ret %struct.int8x8x2_t %.fca.0.1.insert +} + +define %struct.int16x4x2_t @test_vuzp_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp_s16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1 + ret %struct.int16x4x2_t %.fca.0.1.insert +} + +define %struct.int32x2x2_t @test_vuzp_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vuzp_s32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vuzp1.i, 0, 1 + ret %struct.int32x2x2_t %.fca.0.1.insert +} + +define %struct.uint8x8x2_t @test_vuzp_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp_u8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1 + ret %struct.uint8x8x2_t %.fca.0.1.insert +} + +define %struct.uint16x4x2_t @test_vuzp_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp_u16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1 + ret %struct.uint16x4x2_t %.fca.0.1.insert +} + +define %struct.uint32x2x2_t @test_vuzp_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vuzp_u32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vuzp.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %vuzp1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vuzp1.i, 0, 1 + ret %struct.uint32x2x2_t %.fca.0.1.insert +} + +define %struct.float32x2x2_t @test_vuzp_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vuzp_f32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vuzp.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + %vuzp1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vuzp1.i, 0, 1 + ret %struct.float32x2x2_t %.fca.0.1.insert +} + +define %struct.poly8x8x2_t @test_vuzp_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vuzp_p8: +; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vuzp.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vuzp1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1 + ret %struct.poly8x8x2_t %.fca.0.1.insert +} + +define %struct.poly16x4x2_t @test_vuzp_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vuzp_p16: +; CHECK: uzp1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: uzp2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vuzp.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vuzp1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vuzp1.i, 0, 1 + ret %struct.poly16x4x2_t %.fca.0.1.insert +} + +define %struct.int8x16x2_t @test_vuzpq_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzpq_s8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1 + ret %struct.int8x16x2_t %.fca.0.1.insert +} + +define %struct.int16x8x2_t @test_vuzpq_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzpq_s16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1 + ret %struct.int16x8x2_t %.fca.0.1.insert +} + +define %struct.int32x4x2_t @test_vuzpq_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vuzpq_s32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vuzp.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %vuzp1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vuzp1.i, 0, 1 + ret %struct.int32x4x2_t %.fca.0.1.insert +} + +define %struct.uint8x16x2_t @test_vuzpq_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzpq_u8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1 + ret %struct.uint8x16x2_t %.fca.0.1.insert +} + +define %struct.uint16x8x2_t @test_vuzpq_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzpq_u16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1 + ret %struct.uint16x8x2_t %.fca.0.1.insert +} + +define %struct.uint32x4x2_t @test_vuzpq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vuzpq_u32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vuzp.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %vuzp1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vuzp1.i, 0, 1 + ret %struct.uint32x4x2_t %.fca.0.1.insert +} + +define %struct.float32x4x2_t @test_vuzpq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vuzpq_f32: +; CHECK: uzp1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: uzp2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vuzp.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %vuzp1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vuzp1.i, 0, 1 + ret %struct.float32x4x2_t %.fca.0.1.insert +} + +define %struct.poly8x16x2_t @test_vuzpq_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vuzpq_p8: +; CHECK: uzp1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: uzp2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vuzp.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vuzp1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vuzp1.i, 0, 1 + ret %struct.poly8x16x2_t %.fca.0.1.insert +} + +define %struct.poly16x8x2_t @test_vuzpq_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vuzpq_p16: +; CHECK: uzp1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: uzp2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vuzp.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vuzp1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vuzp.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vuzp1.i, 0, 1 + ret %struct.poly16x8x2_t %.fca.0.1.insert +} + +define %struct.int8x8x2_t @test_vzip_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip_s8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1 + ret %struct.int8x8x2_t %.fca.0.1.insert +} + +define %struct.int16x4x2_t @test_vzip_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip_s16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1 + ret %struct.int16x4x2_t %.fca.0.1.insert +} + +define %struct.int32x2x2_t @test_vzip_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vzip_s32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vzip1.i, 0, 1 + ret %struct.int32x2x2_t %.fca.0.1.insert +} + +define %struct.uint8x8x2_t @test_vzip_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip_u8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1 + ret %struct.uint8x8x2_t %.fca.0.1.insert +} + +define %struct.uint16x4x2_t @test_vzip_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip_u16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1 + ret %struct.uint16x4x2_t %.fca.0.1.insert +} + +define %struct.uint32x2x2_t @test_vzip_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vzip_u32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vzip.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %vzip1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vzip1.i, 0, 1 + ret %struct.uint32x2x2_t %.fca.0.1.insert +} + +define %struct.float32x2x2_t @test_vzip_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vzip_f32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vzip.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + %vzip1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vzip1.i, 0, 1 + ret %struct.float32x2x2_t %.fca.0.1.insert +} + +define %struct.poly8x8x2_t @test_vzip_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vzip_p8: +; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vzip.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vzip1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vzip1.i, 0, 1 + ret %struct.poly8x8x2_t %.fca.0.1.insert +} + +define %struct.poly16x4x2_t @test_vzip_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vzip_p16: +; CHECK: zip1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: zip2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vzip.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vzip1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vzip1.i, 0, 1 + ret %struct.poly16x4x2_t %.fca.0.1.insert +} + +define %struct.int8x16x2_t @test_vzipq_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzipq_s8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1 + ret %struct.int8x16x2_t %.fca.0.1.insert +} + +define %struct.int16x8x2_t @test_vzipq_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzipq_s16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1 + ret %struct.int16x8x2_t %.fca.0.1.insert +} + +define %struct.int32x4x2_t @test_vzipq_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vzipq_s32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vzip.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %vzip1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vzip1.i, 0, 1 + ret %struct.int32x4x2_t %.fca.0.1.insert +} + +define %struct.uint8x16x2_t @test_vzipq_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzipq_u8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1 + ret %struct.uint8x16x2_t %.fca.0.1.insert +} + +define %struct.uint16x8x2_t @test_vzipq_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzipq_u16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1 + ret %struct.uint16x8x2_t %.fca.0.1.insert +} + +define %struct.uint32x4x2_t @test_vzipq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vzipq_u32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vzip.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %vzip1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vzip1.i, 0, 1 + ret %struct.uint32x4x2_t %.fca.0.1.insert +} + +define %struct.float32x4x2_t @test_vzipq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vzipq_f32: +; CHECK: zip1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: zip2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vzip.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %vzip1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vzip1.i, 0, 1 + ret %struct.float32x4x2_t %.fca.0.1.insert +} + +define %struct.poly8x16x2_t @test_vzipq_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vzipq_p8: +; CHECK: zip1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: zip2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vzip.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vzip1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vzip1.i, 0, 1 + ret %struct.poly8x16x2_t %.fca.0.1.insert +} + +define %struct.poly16x8x2_t @test_vzipq_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vzipq_p16: +; CHECK: zip1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: zip2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vzip.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vzip.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vzip1.i, 0, 1 + ret %struct.poly16x8x2_t %.fca.0.1.insert +} + +define %struct.int8x8x2_t @test_vtrn_s8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn_s8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1 + ret %struct.int8x8x2_t %.fca.0.1.insert +} + +define %struct.int16x4x2_t @test_vtrn_s16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn_s16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1 + ret %struct.int16x4x2_t %.fca.0.1.insert +} + +define %struct.int32x2x2_t @test_vtrn_s32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vtrn_s32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vtrn1.i, 0, 1 + ret %struct.int32x2x2_t %.fca.0.1.insert +} + +define %struct.uint8x8x2_t @test_vtrn_u8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn_u8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1 + ret %struct.uint8x8x2_t %.fca.0.1.insert +} + +define %struct.uint16x4x2_t @test_vtrn_u16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn_u16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.uint16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1 + ret %struct.uint16x4x2_t %.fca.0.1.insert +} + +define %struct.uint32x2x2_t @test_vtrn_u32(<2 x i32> %a, <2 x i32> %b) { +; CHECK: test_vtrn_u32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vtrn.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %vtrn1.i = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.uint32x2x2_t undef, <2 x i32> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint32x2x2_t %.fca.0.0.insert, <2 x i32> %vtrn1.i, 0, 1 + ret %struct.uint32x2x2_t %.fca.0.1.insert +} + +define %struct.float32x2x2_t @test_vtrn_f32(<2 x float> %a, <2 x float> %b) { +; CHECK: test_vtrn_f32: +; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] +; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] +entry: + %vtrn.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + %vtrn1.i = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> + %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vtrn1.i, 0, 1 + ret %struct.float32x2x2_t %.fca.0.1.insert +} + +define %struct.poly8x8x2_t @test_vtrn_p8(<8 x i8> %a, <8 x i8> %b) { +; CHECK: test_vtrn_p8: +; CHECK: trn1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: trn2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +entry: + %vtrn.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %vtrn1.i = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.poly8x8x2_t undef, <8 x i8> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly8x8x2_t %.fca.0.0.insert, <8 x i8> %vtrn1.i, 0, 1 + ret %struct.poly8x8x2_t %.fca.0.1.insert +} + +define %struct.poly16x4x2_t @test_vtrn_p16(<4 x i16> %a, <4 x i16> %b) { +; CHECK: test_vtrn_p16: +; CHECK: trn1 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +; CHECK: trn2 {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +entry: + %vtrn.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %vtrn1.i = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.poly16x4x2_t undef, <4 x i16> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly16x4x2_t %.fca.0.0.insert, <4 x i16> %vtrn1.i, 0, 1 + ret %struct.poly16x4x2_t %.fca.0.1.insert +} + +define %struct.int8x16x2_t @test_vtrnq_s8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrnq_s8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1 + ret %struct.int8x16x2_t %.fca.0.1.insert +} + +define %struct.int16x8x2_t @test_vtrnq_s16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrnq_s16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1 + ret %struct.int16x8x2_t %.fca.0.1.insert +} + +define %struct.int32x4x2_t @test_vtrnq_s32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vtrnq_s32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vtrn.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %vtrn1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vtrn1.i, 0, 1 + ret %struct.int32x4x2_t %.fca.0.1.insert +} + +define %struct.uint8x16x2_t @test_vtrnq_u8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrnq_u8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.uint8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1 + ret %struct.uint8x16x2_t %.fca.0.1.insert +} + +define %struct.uint16x8x2_t @test_vtrnq_u16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrnq_u16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.uint16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1 + ret %struct.uint16x8x2_t %.fca.0.1.insert +} + +define %struct.uint32x4x2_t @test_vtrnq_u32(<4 x i32> %a, <4 x i32> %b) { +; CHECK: test_vtrnq_u32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vtrn.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %vtrn1.i = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.uint32x4x2_t undef, <4 x i32> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.uint32x4x2_t %.fca.0.0.insert, <4 x i32> %vtrn1.i, 0, 1 + ret %struct.uint32x4x2_t %.fca.0.1.insert +} + +define %struct.float32x4x2_t @test_vtrnq_f32(<4 x float> %a, <4 x float> %b) { +; CHECK: test_vtrnq_f32: +; CHECK: trn1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +; CHECK: trn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +entry: + %vtrn.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %vtrn1.i = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vtrn1.i, 0, 1 + ret %struct.float32x4x2_t %.fca.0.1.insert +} + +define %struct.poly8x16x2_t @test_vtrnq_p8(<16 x i8> %a, <16 x i8> %b) { +; CHECK: test_vtrnq_p8: +; CHECK: trn1 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: trn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +entry: + %vtrn.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %vtrn1.i = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + %.fca.0.0.insert = insertvalue %struct.poly8x16x2_t undef, <16 x i8> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly8x16x2_t %.fca.0.0.insert, <16 x i8> %vtrn1.i, 0, 1 + ret %struct.poly8x16x2_t %.fca.0.1.insert +} + +define %struct.poly16x8x2_t @test_vtrnq_p16(<8 x i16> %a, <8 x i16> %b) { +; CHECK: test_vtrnq_p16: +; CHECK: trn1 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +; CHECK: trn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +entry: + %vtrn.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %vtrn1.i = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + %.fca.0.0.insert = insertvalue %struct.poly16x8x2_t undef, <8 x i16> %vtrn.i, 0, 0 + %.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1 + ret %struct.poly16x8x2_t %.fca.0.1.insert +} Index: test/MC/AArch64/neon-diagnostics.s =================================================================== --- test/MC/AArch64/neon-diagnostics.s +++ test/MC/AArch64/neon-diagnostics.s @@ -4697,3 +4697,211 @@ // CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: uqxtn s19, s14 // CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Permutation with 3 vectors +//---------------------------------------------------------------------- + + uzp1 v0.16b, v1.8b, v2.8b + uzp1 v0.8b, v1.4b, v2.4b + uzp1 v0.8h, v1.4h, v2.4h + uzp1 v0.4h, v1.2h, v2.2h + uzp1 v0.4s, v1.2s, v2.2s + uzp1 v0.2s, v1.1s, v2.1s + uzp1 v0.2d, v1.1d, v2.1d + uzp1 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4289:22: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4290:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4291:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4292:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4293:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4294:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4295:21: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4296:17: error: invalid operand for instruction +// CHECK-ERROR uzp1 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + uzp2 v0.16b, v1.8b, v2.8b + uzp2 v0.8b, v1.4b, v2.4b + uzp2 v0.8h, v1.4h, v2.4h + uzp2 v0.4h, v1.2h, v2.2h + uzp2 v0.4s, v1.2s, v2.2s + uzp2 v0.2s, v1.1s, v2.1s + uzp2 v0.2d, v1.1d, v2.1d + uzp2 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4298:22: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4299:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4300:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4301:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4302:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4303:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4304:21: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4305:17: error: invalid operand for instruction +// CHECK-ERROR uzp2 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + zip1 v0.16b, v1.8b, v2.8b + zip1 v0.8b, v1.4b, v2.4b + zip1 v0.8h, v1.4h, v2.4h + zip1 v0.4h, v1.2h, v2.2h + zip1 v0.4s, v1.2s, v2.2s + zip1 v0.2s, v1.1s, v2.1s + zip1 v0.2d, v1.1d, v2.1d + zip1 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4307:22: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4308:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4309:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4310:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4311:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4312:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4313:21: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4314:17: error: invalid operand for instruction +// CHECK-ERROR zip1 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + zip2 v0.16b, v1.8b, v2.8b + zip2 v0.8b, v1.4b, v2.4b + zip2 v0.8h, v1.4h, v2.4h + zip2 v0.4h, v1.2h, v2.2h + zip2 v0.4s, v1.2s, v2.2s + zip2 v0.2s, v1.1s, v2.1s + zip2 v0.2d, v1.1d, v2.1d + zip2 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4316:22: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4317:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4318:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4319:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4320:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4321:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4322:21: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4323:17: error: invalid operand for instruction +// CHECK-ERROR zip2 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + trn1 v0.16b, v1.8b, v2.8b + trn1 v0.8b, v1.4b, v2.4b + trn1 v0.8h, v1.4h, v2.4h + trn1 v0.4h, v1.2h, v2.2h + trn1 v0.4s, v1.2s, v2.2s + trn1 v0.2s, v1.1s, v2.1s + trn1 v0.2d, v1.1d, v2.1d + trn1 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4325:22: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4326:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4327:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4328:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4329:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4330:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4331:21: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4332:17: error: invalid operand for instruction +// CHECK-ERROR trn1 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ + + trn2 v0.16b, v1.8b, v2.8b + trn2 v0.8b, v1.4b, v2.4b + trn2 v0.8h, v1.4h, v2.4h + trn2 v0.4h, v1.2h, v2.2h + trn2 v0.4s, v1.2s, v2.2s + trn2 v0.2s, v1.1s, v2.1s + trn2 v0.2d, v1.1d, v2.1d + trn2 v0.1d, v1.1d, v2.1d + +// CHECK-ERROR :4334:22: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.16b, v1.8b, v2.8b +// CHECK-ERROR ^ +// CHECK-ERROR :4335:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.8b, v1.4b, v2.4b +// CHECK-ERROR ^ +// CHECK-ERROR :4336:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.8h, v1.4h, v2.4h +// CHECK-ERROR ^ +// CHECK-ERROR :4337:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.4h, v1.2h, v2.2h +// CHECK-ERROR ^ +// CHECK-ERROR :4338:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.4s, v1.2s, v2.2s +// CHECK-ERROR ^ +// CHECK-ERROR :4339:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.2s, v1.1s, v2.1s +// CHECK-ERROR ^ +// CHECK-ERROR :4340:21: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.2d, v1.1d, v2.1d +// CHECK-ERROR ^ +// CHECK-ERROR :4341:17: error: invalid operand for instruction +// CHECK-ERROR trn2 v0.1d, v1.1d, v2.1d +// CHECK-ERROR ^ Index: test/MC/AArch64/neon-perm.s =================================================================== --- /dev/null +++ test/MC/AArch64/neon-perm.s @@ -0,0 +1,103 @@ +// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s + +// Check that the assembler can handle the documented syntax for AArch64 + +//------------------------------------------------------------------------------ +// Instructions for permute +//------------------------------------------------------------------------------ + + uzp1 v0.8b, v1.8b, v2.8b + uzp1 v0.16b, v1.16b, v2.16b + uzp1 v0.4h, v1.4h, v2.4h + uzp1 v0.8h, v1.8h, v2.8h + uzp1 v0.2s, v1.2s, v2.2s + uzp1 v0.4s, v1.4s, v2.4s + uzp1 v0.2d, v1.2d, v2.2d + +// CHECK: uzp1 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x18,0x02,0x0e] +// CHECK: uzp1 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x18,0x02,0x4e] +// CHECK: uzp1 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x18,0x42,0x0e] +// CHECK: uzp1 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x18,0x42,0x4e] +// CHECK: uzp1 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x18,0x82,0x0e] +// CHECK: uzp1 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x18,0x82,0x4e] +// CHECK: uzp1 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x18,0xc2,0x4e] + + trn1 v0.8b, v1.8b, v2.8b + trn1 v0.16b, v1.16b, v2.16b + trn1 v0.4h, v1.4h, v2.4h + trn1 v0.8h, v1.8h, v2.8h + trn1 v0.2s, v1.2s, v2.2s + trn1 v0.4s, v1.4s, v2.4s + trn1 v0.2d, v1.2d, v2.2d + +// CHECK: trn1 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x28,0x02,0x0e] +// CHECK: trn1 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x28,0x02,0x4e] +// CHECK: trn1 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x28,0x42,0x0e] +// CHECK: trn1 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x28,0x42,0x4e] +// CHECK: trn1 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x28,0x82,0x0e] +// CHECK: trn1 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x28,0x82,0x4e] +// CHECK: trn1 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x28,0xc2,0x4e] + + zip1 v0.8b, v1.8b, v2.8b + zip1 v0.16b, v1.16b, v2.16b + zip1 v0.4h, v1.4h, v2.4h + zip1 v0.8h, v1.8h, v2.8h + zip1 v0.2s, v1.2s, v2.2s + zip1 v0.4s, v1.4s, v2.4s + zip1 v0.2d, v1.2d, v2.2d + +// CHECK: zip1 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x38,0x02,0x0e] +// CHECK: zip1 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x38,0x02,0x4e] +// CHECK: zip1 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x38,0x42,0x0e] +// CHECK: zip1 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x38,0x42,0x4e] +// CHECK: zip1 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x38,0x82,0x0e] +// CHECK: zip1 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x38,0x82,0x4e] +// CHECK: zip1 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x38,0xc2,0x4e] + + uzp2 v0.8b, v1.8b, v2.8b + uzp2 v0.16b, v1.16b, v2.16b + uzp2 v0.4h, v1.4h, v2.4h + uzp2 v0.8h, v1.8h, v2.8h + uzp2 v0.2s, v1.2s, v2.2s + uzp2 v0.4s, v1.4s, v2.4s + uzp2 v0.2d, v1.2d, v2.2d + +// CHECK: uzp2 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x58,0x02,0x0e] +// CHECK: uzp2 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x58,0x02,0x4e] +// CHECK: uzp2 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x58,0x42,0x0e] +// CHECK: uzp2 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x58,0x42,0x4e] +// CHECK: uzp2 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x58,0x82,0x0e] +// CHECK: uzp2 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x58,0x82,0x4e] +// CHECK: uzp2 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x58,0xc2,0x4e] + + trn2 v0.8b, v1.8b, v2.8b + trn2 v0.16b, v1.16b, v2.16b + trn2 v0.4h, v1.4h, v2.4h + trn2 v0.8h, v1.8h, v2.8h + trn2 v0.2s, v1.2s, v2.2s + trn2 v0.4s, v1.4s, v2.4s + trn2 v0.2d, v1.2d, v2.2d + +// CHECK: trn2 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x68,0x02,0x0e] +// CHECK: trn2 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x68,0x02,0x4e] +// CHECK: trn2 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x68,0x42,0x0e] +// CHECK: trn2 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x68,0x42,0x4e] +// CHECK: trn2 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x68,0x82,0x0e] +// CHECK: trn2 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x68,0x82,0x4e] +// CHECK: trn2 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x68,0xc2,0x4e] + + zip2 v0.8b, v1.8b, v2.8b + zip2 v0.16b, v1.16b, v2.16b + zip2 v0.4h, v1.4h, v2.4h + zip2 v0.8h, v1.8h, v2.8h + zip2 v0.2s, v1.2s, v2.2s + zip2 v0.4s, v1.4s, v2.4s + zip2 v0.2d, v1.2d, v2.2d + +// CHECK: zip2 v0.8b, v1.8b, v2.8b // encoding: [0x20,0x78,0x02,0x0e] +// CHECK: zip2 v0.16b, v1.16b, v2.16b // encoding: [0x20,0x78,0x02,0x4e] +// CHECK: zip2 v0.4h, v1.4h, v2.4h // encoding: [0x20,0x78,0x42,0x0e] +// CHECK: zip2 v0.8h, v1.8h, v2.8h // encoding: [0x20,0x78,0x42,0x4e] +// CHECK: zip2 v0.2s, v1.2s, v2.2s // encoding: [0x20,0x78,0x82,0x0e] +// CHECK: zip2 v0.4s, v1.4s, v2.4s // encoding: [0x20,0x78,0x82,0x4e] +// CHECK: zip2 v0.2d, v1.2d, v2.2d // encoding: [0x20,0x78,0xc2,0x4e] Index: test/MC/Disassembler/AArch64/neon-instructions.txt =================================================================== --- test/MC/Disassembler/AArch64/neon-instructions.txt +++ test/MC/Disassembler/AArch64/neon-instructions.txt @@ -1793,3 +1793,111 @@ 0x52,0x4a,0x21,0x7e 0x34,0x4a,0x61,0x7e 0xd3,0x49,0xa1,0x7e + +#---------------------------------------------------------------------- +# unzip with 3 same vectors to get primary result +#---------------------------------------------------------------------- +# CHECK: uzp1 v1.8b, v1.8b, v2.8b +# CHECK: uzp1 v2.16b, v1.16b, v2.16b +# CHECK: uzp1 v3.4h, v1.4h, v2.4h +# CHECK: uzp1 v4.8h, v1.8h, v2.8h +# CHECK: uzp1 v5.2s, v1.2s, v2.2s +# CHECK: uzp1 v6.4s, v1.4s, v2.4s +# CHECK: uzp1 v7.2d, v1.2d, v2.2d +0x21,0x18,0x02,0x0e +0x22,0x18,0x02,0x4e +0x23,0x18,0x42,0x0e +0x24,0x18,0x42,0x4e +0x25,0x18,0x82,0x0e +0x26,0x18,0x82,0x4e +0x27,0x18,0xc2,0x4e + +#---------------------------------------------------------------------- +# transpose with 3 same vectors to get primary result +#---------------------------------------------------------------------- +# CHECK: trn1 v8.8b, v1.8b, v2.8b +# CHECK: trn1 v9.16b, v1.16b, v2.16b +# CHECK: trn1 v10.4h, v1.4h, v2.4h +# CHECK: trn1 v27.8h, v7.8h, v2.8h +# CHECK: trn1 v12.2s, v7.2s, v2.2s +# CHECK: trn1 v29.4s, v6.4s, v2.4s +# CHECK: trn1 v14.2d, v6.2d, v2.2d +0x28,0x28,0x02,0x0e +0x29,0x28,0x02,0x4e +0x2a,0x28,0x42,0x0e +0xfb,0x28,0x42,0x4e +0xec,0x28,0x82,0x0e +0xdd,0x28,0x82,0x4e +0xce,0x28,0xc2,0x4e + +#---------------------------------------------------------------------- +# zip with 3 same vectors to get primary result +#---------------------------------------------------------------------- +# CHECK: zip1 v31.8b, v5.8b, v2.8b +# CHECK: zip1 v0.16b, v5.16b, v2.16b +# CHECK: zip1 v17.4h, v4.4h, v2.4h +# CHECK: zip1 v2.8h, v4.8h, v2.8h +# CHECK: zip1 v19.2s, v3.2s, v2.2s +# CHECK: zip1 v4.4s, v3.4s, v2.4s +# CHECK: zip1 v21.2d, v2.2d, v2.2d +0xbf,0x38,0x02,0x0e +0xa0,0x38,0x02,0x4e +0x91,0x38,0x42,0x0e +0x82,0x38,0x42,0x4e +0x73,0x38,0x82,0x0e +0x64,0x38,0x82,0x4e +0x55,0x38,0xc2,0x4e + +#---------------------------------------------------------------------- +# unzip with 3 same vectors to get secondary result +#---------------------------------------------------------------------- +# CHECK: uzp2 v6.8b, v2.8b, v2.8b +# CHECK: uzp2 v23.16b, v1.16b, v2.16b +# CHECK: uzp2 v8.4h, v1.4h, v2.4h +# CHECK: uzp2 v25.8h, v0.8h, v2.8h +# CHECK: uzp2 v10.2s, v0.2s, v2.2s +# CHECK: uzp2 v27.4s, v7.4s, v2.4s +# CHECK: uzp2 v12.2d, v7.2d, v2.2d +0x46,0x58,0x02,0x0e +0x37,0x58,0x02,0x4e +0x28,0x58,0x42,0x0e +0x19,0x58,0x42,0x4e +0x0a,0x58,0x82,0x0e +0xfb,0x58,0x82,0x4e +0xec,0x58,0xc2,0x4e + +#---------------------------------------------------------------------- +# transpose with 3 same vectors to get secondary result +#---------------------------------------------------------------------- +# CHECK: trn2 v29.8b, v6.8b, v2.8b +# CHECK: trn2 v14.16b, v6.16b, v2.16b +# CHECK: trn2 v31.4h, v5.4h, v2.4h +# CHECK: trn2 v0.8h, v5.8h, v2.8h +# CHECK: trn2 v17.2s, v4.2s, v2.2s +# CHECK: trn2 v2.4s, v4.4s, v2.4s +# CHECK: trn2 v19.2d, v3.2d, v2.2d +0xdd,0x68,0x02,0x0e +0xce,0x68,0x02,0x4e +0xbf,0x68,0x42,0x0e +0xa0,0x68,0x42,0x4e +0x91,0x68,0x82,0x0e +0x82,0x68,0x82,0x4e +0x73,0x68,0xc2,0x4e + +#---------------------------------------------------------------------- +# zip with 3 same vectors to get secondary result +#---------------------------------------------------------------------- +# CHECK: zip2 v4.8b, v3.8b, v2.8b +# CHECK: zip2 v21.16b, v2.16b, v2.16b +# CHECK: zip2 v6.4h, v2.4h, v2.4h +# CHECK: zip2 v23.8h, v1.8h, v2.8h +# CHECK: zip2 v8.2s, v1.2s, v2.2s +# CHECK: zip2 v25.4s, v0.4s, v2.4s +# CHECK: zip2 v10.2d, v0.2d, v2.2d +0x64,0x78,0x02,0x0e +0x55,0x78,0x02,0x4e +0x46,0x78,0x42,0x0e +0x37,0x78,0x42,0x4e +0x28,0x78,0x82,0x0e +0x19,0x78,0x82,0x4e +0x0a,0x78,0xc2,0x4e