diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -17671,6 +17671,75 @@ The argument to this intrinsic must be a vector. +'``llvm.experimental.vector.deinterleave2``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare {<2 x double>, <2 x double>} @llvm.experimental.vector.deinterleave2.v4f64(<4 x double> %vec1) + declare {, } @llvm.experimental.vector.deinterleave2.nxv8i32( %vec1) + +Overview: +""""""""" + +The '``llvm.experimental.vector.deinterleave2``' intrinsic construct two +vectors by deinterleaving the even and odd lanes of the input vector. + +This intrinsic work for both fixed and scalable vectors. While this intrinsic +is marked as experimental, the recommended way to express this operation for +fixed-width vectors is still to use a shufflevector, as that may allow for more +optimization opportunities. + +For example: + +.. code-block:: text + + {<2 x i64>, <2 x i64>} llvm.experimental.vector.deinterleave2.v4i64(<4 x i64> ); ==> {<2 x i64> , <2 x i64> } + +Arguments: +"""""""""" + +The argument to this intrinsic must be a vector and that is twice the size of +one output vector. + +'``llvm.experimental.vector.interleave2``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <4 x double> @llvm.experimental.vector.interleave2.v4f64(<2 x double> %vec1, <2 x double> %vec2) + declare @llvm.experimental.vector.interleave2.nxv8i32( %vec1, %vec2) + +Overview: +""""""""" + +The '``llvm.experimental.vector.interleave2``' intrinsics construct a vector +by interleaving two input vectors. + +This intrinsic work for both fixed and scalable vectors. While this intrinsic +is marked as experimental, the recommended way to express this operation for +fixed-width vectors is still to use a shufflevector, as that may allow for more +optimization opportunities. + +For example: + +.. code-block:: text + + <4 x i64> llvm.experimental.vector.interleave2.v4i64(<2 x i64> , <2 x i64> ); ==> <4 x i64> + +Arguments: +"""""""""" +The argument to this intrinsic must be two vectors where each vector must be +half the size of the output. + '``llvm.experimental.vector.splice``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -571,6 +571,19 @@ /// vector, but not the other way around. EXTRACT_SUBVECTOR, + /// VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two deinterleaved vectors; + /// the even indexes from VEC1 and VEC2 construct the first result vector. + /// The odd indexes from VEC1 and VEC2 contruct the second result vector. + /// The type of the two result vectors must match the type of VEC1 and VEC2. + VECTOR_DEINTERLEAVE, + + /// VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two interleaved vectors; + /// the first result vector is constructed from interleaving the low halves + /// of VEC1 and VEC2. The second result vector is constructed from + /// interleaving the high halves of VEC1 and VEC2. + /// The type of the two result vectors must match the type of VEC1 and VEC2. + VECTOR_INTERLEAVE, + /// VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, /// whose elements are shuffled using the following algorithm: /// RESULT[i] = VECTOR[VECTOR.ElementCount - 1 - i] diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2116,6 +2116,17 @@ [llvm_anyvector_ty, llvm_i64_ty], [IntrNoMem, IntrSpeculatable, ImmArg>]>; + +def int_experimental_vector_interleave2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMHalfElementsVectorType<0>, + LLVMHalfElementsVectorType<0>], + [IntrNoMem]>; + +def int_experimental_vector_deinterleave2 : DefaultAttrsIntrinsic<[LLVMHalfElementsVectorType<0>, + LLVMHalfElementsVectorType<0>], + [llvm_anyvector_ty], + [IntrNoMem]>; + //===----------------- Pointer Authentication Intrinsics ------------------===// // diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -648,6 +648,8 @@ void visitVectorReduce(const CallInst &I, unsigned Intrinsic); void visitVectorReverse(const CallInst &I); void visitVectorSplice(const CallInst &I); + void visitVectorInterleave(const CallInst &I); + void visitVectorDeinterleave(const CallInst &I); void visitStepVector(const CallInst &I); void visitUserOp1(const Instruction &I) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -29,6 +29,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "llvm/CodeGen/CodeGenCommonISel.h" @@ -7319,6 +7320,12 @@ case Intrinsic::experimental_vector_splice: visitVectorSplice(I); return; + case Intrinsic::experimental_vector_interleave2: + visitVectorInterleave(I); + return; + case Intrinsic::experimental_vector_deinterleave2: + visitVectorDeinterleave(I); + return; } } @@ -11549,6 +11556,66 @@ setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask)); } +void SelectionDAGBuilder::visitVectorDeinterleave(const CallInst &I) { + auto DL = getCurSDLoc(); + SDValue InVec = getValue(I.getOperand(0)); + SmallVector OutVTs; + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), + OutVTs); + + unsigned OutNumElts = OutVTs[0].getVectorMinNumElements(); + + // Split the input vector into two parts + SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVTs[0], InVec, + DAG.getConstant(0, DL, MVT::i64)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVTs[1], InVec, + DAG.getConstant(OutNumElts, DL, MVT::i64)); + + // Use VECTOR_SHUFFLE for the fixed-length vector to to benefit from + // existing combines for recognizing specific deinterleave patterns using + // VECTOR_SHUFFLE. + if (!OutVTs[0].isScalableVector()) { + SDValue Even = DAG.getVectorShuffle(OutVTs[0], DL, Lo, Hi, + createStrideMask(0, 2, OutNumElts)); + SDValue Odd = DAG.getVectorShuffle(OutVTs[1], DL, Lo, Hi, + createStrideMask(1, 2, OutNumElts)); + SDValue Res = DAG.getMergeValues({Even, Odd}, getCurSDLoc()); + setValue(&I, Res); + return; + } + + SDValue Res = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, + DAG.getVTList(OutVTs[0], OutVTs[1]), Lo, Hi); + setValue(&I, Res); + return; +} + +void SelectionDAGBuilder::visitVectorInterleave(const CallInst &I) { + auto DL = getCurSDLoc(); + EVT InVT = getValue(I.getOperand(0)).getValueType(); + SDValue InVec[] = {getValue(I.getOperand(0)), getValue(I.getOperand(1))}; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT OutVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + // Use VECTOR_SHUFFLE for the fixed-length vector to to benefit from + // existing combines for recognizing specific interleave patterns using + // VECTOR_SHUFFLE. + if (!OutVT.isScalableVector()) { + unsigned NumElts = InVT.getVectorMinNumElements(); + SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, InVec[0], InVec[1]); + setValue(&I, DAG.getVectorShuffle(OutVT, DL, V, DAG.getUNDEF(OutVT), + createInterleaveMask(NumElts, 2))); + return; + } + + SDValue Res = + DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, DAG.getVTList(InVT, InVT), InVec); + Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Res.getValue(0), + Res.getValue(1)); + setValue(&I, Res); + return; +} + void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) { SmallVector ValueVTs; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -293,6 +293,8 @@ case ISD::CONCAT_VECTORS: return "concat_vectors"; case ISD::INSERT_SUBVECTOR: return "insert_subvector"; case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::VECTOR_DEINTERLEAVE: return "vector_deinterleave"; + case ISD::VECTOR_INTERLEAVE: return "vector_interleave"; case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; case ISD::VECTOR_SPLICE: return "vector_splice"; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1047,6 +1047,8 @@ SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1208,6 +1208,8 @@ {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) { setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); + setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); } } @@ -1253,6 +1255,8 @@ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); + setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); + setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); @@ -1394,6 +1398,8 @@ setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); + setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); + setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); @@ -6064,6 +6070,10 @@ return LowerCTTZ(Op, DAG); case ISD::VECTOR_SPLICE: return LowerVECTOR_SPLICE(Op, DAG); + case ISD::VECTOR_DEINTERLEAVE: + return LowerVECTOR_DEINTERLEAVE(Op, DAG); + case ISD::VECTOR_INTERLEAVE: + return LowerVECTOR_INTERLEAVE(Op, DAG); case ISD::STRICT_LROUND: case ISD::STRICT_LLROUND: case ISD::STRICT_LRINT: @@ -23556,6 +23566,35 @@ } } +SDValue +AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT OpVT = Op.getValueType(); + assert(OpVT.isScalableVector() && + "Unexpected fixed length vector in LowerVECTOR_DEINTERLEAVE."); + SDValue Even = DAG.getNode(AArch64ISD::UZP1, DL, OpVT, Op.getOperand(0), + Op.getOperand(1)); + SDValue Odd = DAG.getNode(AArch64ISD::UZP2, DL, OpVT, Op.getOperand(0), + Op.getOperand(1)); + return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(OpVT, OpVT), Even, + Odd); +} + +SDValue AArch64TargetLowering::LowerVECTOR_INTERLEAVE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT OpVT = Op.getValueType(); + assert(OpVT.isScalableVector() && + "Unexpected fixed length vector in LowerVECTOR_INTERLEAVE."); + + SDValue Lo = DAG.getNode(AArch64ISD::ZIP1, DL, OpVT, Op.getOperand(0), + Op.getOperand(1)); + SDValue Hi = DAG.getNode(AArch64ISD::ZIP2, DL, OpVT, Op.getOperand(0), + Op.getOperand(1)); + return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(OpVT, OpVT), Lo, Hi); +} + SDValue AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s + +define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec) { +; CHECK-LABEL: vector_deinterleave_v2f16_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v1.2s, v0.s[1] +; CHECK-NEXT: mov v2.16b, v0.16b +; CHECK-NEXT: mov v2.h[1], v1.h[0] +; CHECK-NEXT: mov v1.h[0], v0.h[1] +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ret +%retval = call {<2 x half>, <2 x half>} @llvm.experimental.vector.deinterleave2.v4f16(<4 x half> %vec) +ret {<2 x half>, <2 x half>} %retval +} + +define {<4 x half>, <4 x half>} @vector_deinterleave_v4f16_v8f16(<8 x half> %vec) { +; CHECK-LABEL: vector_deinterleave_v4f16_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: uzp1 v2.4h, v0.4h, v1.4h +; CHECK-NEXT: uzp2 v1.4h, v0.4h, v1.4h +; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ret +%retval = call {<4 x half>, <4 x half>} @llvm.experimental.vector.deinterleave2.v8f16(<8 x half> %vec) +ret {<4 x half>, <4 x half>} %retval +} + +define {<2 x float>, <2 x float>} @vector_deinterleave_v2f32_v4f32(<4 x float> %vec) { +; CHECK-LABEL: vector_deinterleave_v2f32_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: zip1 v2.2s, v0.2s, v1.2s +; CHECK-NEXT: zip2 v1.2s, v0.2s, v1.2s +; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ret +%retval = call {<2 x float>, <2 x float>} @llvm.experimental.vector.deinterleave2.v4f32(<4 x float> %vec) +ret {<2 x float>, <2 x float>} %retval +} + +define {<8 x half>, <8 x half>} @vector_deinterleave_v8f16_v16f16(<16 x half> %vec) { +; CHECK-LABEL: vector_deinterleave_v8f16_v16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v2.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp2 v1.8h, v0.8h, v1.8h +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret +%retval = call {<8 x half>, <8 x half>} @llvm.experimental.vector.deinterleave2.v16f16(<16 x half> %vec) +ret {<8 x half>, <8 x half>} %retval +} + +define {<4 x float>, <4 x float>} @vector_deinterleave_v4f32_v8f32(<8 x float> %vec) { +; CHECK-LABEL: vector_deinterleave_v4f32_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp2 v1.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret +%retval = call {<4 x float>, <4 x float>} @llvm.experimental.vector.deinterleave2.v8f32(<8 x float> %vec) +ret {<4 x float>, <4 x float>} %retval +} + +define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double> %vec) { +; CHECK-LABEL: vector_deinterleave_v2f64_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.2d, v0.2d, v1.2d +; CHECK-NEXT: zip2 v1.2d, v0.2d, v1.2d +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret +%retval = call {<2 x double>, <2 x double>} @llvm.experimental.vector.deinterleave2.v4f64(<4 x double> %vec) +ret {<2 x double>, <2 x double>} %retval +} + +; Integers + +define {<16 x i8>, <16 x i8>} @vector_deinterleave_v16i8_v32i8(<32 x i8> %vec) { +; CHECK-LABEL: vector_deinterleave_v16i8_v32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v2.16b, v0.16b, v1.16b +; CHECK-NEXT: uzp2 v1.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret +%retval = call {<16 x i8>, <16 x i8>} @llvm.experimental.vector.deinterleave2.v32i8(<32 x i8> %vec) +ret {<16 x i8>, <16 x i8>} %retval +} + +define {<8 x i16>, <8 x i16>} @vector_deinterleave_v8i16_v16i16(<16 x i16> %vec) { +; CHECK-LABEL: vector_deinterleave_v8i16_v16i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v2.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp2 v1.8h, v0.8h, v1.8h +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret +%retval = call {<8 x i16>, <8 x i16>} @llvm.experimental.vector.deinterleave2.v16i16(<16 x i16> %vec) +ret {<8 x i16>, <8 x i16>} %retval +} + +define {<4 x i32>, <4 x i32>} @vector_deinterleave_v4i32_v8i32(<8 x i32> %vec) { +; CHECK-LABEL: vector_deinterleave_v4i32_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v2.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp2 v1.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret +%retval = call {<4 x i32>, <4 x i32>} @llvm.experimental.vector.deinterleave2.v8i32(<8 x i32> %vec) +ret {<4 x i32>, <4 x i32>} %retval +} + +define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) { +; CHECK-LABEL: vector_deinterleave_v2i64_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.2d, v0.2d, v1.2d +; CHECK-NEXT: zip2 v1.2d, v0.2d, v1.2d +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret +%retval = call {<2 x i64>, <2 x i64>} @llvm.experimental.vector.deinterleave2.v4i64(<4 x i64> %vec) +ret {<2 x i64>, <2 x i64>} %retval +} + +; Predicated +define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { +; CHECK-LABEL: vector_deinterleave_v16i1_v32i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr w8, [sp, #64] +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: ldr w10, [sp, #136] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: ldr w8, [sp, #80] +; CHECK-NEXT: mov v2.b[1], w1 +; CHECK-NEXT: mov v1.b[1], w9 +; CHECK-NEXT: ldr w9, [sp, #88] +; CHECK-NEXT: mov v2.b[2], w2 +; CHECK-NEXT: mov v1.b[2], w8 +; CHECK-NEXT: ldr w8, [sp, #96] +; CHECK-NEXT: mov v2.b[3], w3 +; CHECK-NEXT: mov v1.b[3], w9 +; CHECK-NEXT: ldr w9, [sp, #104] +; CHECK-NEXT: mov v2.b[4], w4 +; CHECK-NEXT: mov v1.b[4], w8 +; CHECK-NEXT: ldr w8, [sp, #112] +; CHECK-NEXT: mov v2.b[5], w5 +; CHECK-NEXT: mov v1.b[5], w9 +; CHECK-NEXT: ldr w9, [sp, #120] +; CHECK-NEXT: mov v2.b[6], w6 +; CHECK-NEXT: mov v1.b[6], w8 +; CHECK-NEXT: ldr w8, [sp, #128] +; CHECK-NEXT: mov v2.b[7], w7 +; CHECK-NEXT: mov v1.b[7], w9 +; CHECK-NEXT: ldr w9, [sp] +; CHECK-NEXT: mov v2.b[8], w9 +; CHECK-NEXT: ldr w9, [sp, #144] +; CHECK-NEXT: mov v1.b[8], w8 +; CHECK-NEXT: ldr w8, [sp, #8] +; CHECK-NEXT: mov v2.b[9], w8 +; CHECK-NEXT: ldr w8, [sp, #16] +; CHECK-NEXT: mov v1.b[9], w10 +; CHECK-NEXT: ldr w10, [sp, #152] +; CHECK-NEXT: mov v2.b[10], w8 +; CHECK-NEXT: ldr w8, [sp, #24] +; CHECK-NEXT: mov v1.b[10], w9 +; CHECK-NEXT: ldr w9, [sp, #160] +; CHECK-NEXT: mov v2.b[11], w8 +; CHECK-NEXT: ldr w8, [sp, #32] +; CHECK-NEXT: mov v1.b[11], w10 +; CHECK-NEXT: ldr w10, [sp, #168] +; CHECK-NEXT: mov v2.b[12], w8 +; CHECK-NEXT: ldr w8, [sp, #40] +; CHECK-NEXT: mov v1.b[12], w9 +; CHECK-NEXT: ldr w9, [sp, #176] +; CHECK-NEXT: mov v2.b[13], w8 +; CHECK-NEXT: ldr w8, [sp, #48] +; CHECK-NEXT: mov v1.b[13], w10 +; CHECK-NEXT: ldr w10, [sp, #184] +; CHECK-NEXT: mov v2.b[14], w8 +; CHECK-NEXT: ldr w8, [sp, #56] +; CHECK-NEXT: mov v1.b[14], w9 +; CHECK-NEXT: mov v2.b[15], w8 +; CHECK-NEXT: mov v1.b[15], w10 +; CHECK-NEXT: uzp1 v0.16b, v2.16b, v1.16b +; CHECK-NEXT: uzp2 v1.16b, v2.16b, v1.16b +; CHECK-NEXT: ret +%retval = call {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1> %vec) +ret {<16 x i1>, <16 x i1>} %retval +} + +define {<8 x i1>, <8 x i1>} @vector_deinterleave_v8i1_v16i1(<16 x i1> %vec) { +; CHECK-LABEL: vector_deinterleave_v8i1_v16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: uzp1 v2.8b, v0.8b, v1.8b +; CHECK-NEXT: uzp2 v1.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ret +%retval = call {<8 x i1>, <8 x i1>} @llvm.experimental.vector.deinterleave2.v16i1(<16 x i1> %vec) +ret {<8 x i1>, <8 x i1>} %retval +} + +define {<4 x i1>, <4 x i1>} @vector_deinterleave_v4i1_v8i1(<8 x i1> %vec) { +; CHECK-LABEL: vector_deinterleave_v4i1_v8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 v1.8b, v0.8b, v0.8b +; CHECK-NEXT: zip1 v2.8b, v0.8b, v0.8b +; CHECK-NEXT: uzp1 v0.4h, v2.4h, v1.4h +; CHECK-NEXT: uzp2 v1.4h, v2.4h, v1.4h +; CHECK-NEXT: ret +%retval = call {<4 x i1>, <4 x i1>} @llvm.experimental.vector.deinterleave2.v8i1(<8 x i1> %vec) +ret {<4 x i1>, <4 x i1>} %retval +} + +define {<2 x i1>, <2 x i1>} @vector_deinterleave_v2i1_v4i1(<4 x i1> %vec) { +; CHECK-LABEL: vector_deinterleave_v2i1_v4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: zip1 v0.2s, v1.2s, v2.2s +; CHECK-NEXT: zip2 v1.2s, v1.2s, v2.2s +; CHECK-NEXT: ret +%retval = call {<2 x i1>, <2 x i1>} @llvm.experimental.vector.deinterleave2.v4i1(<4 x i1> %vec) +ret {<2 x i1>, <2 x i1>} %retval +} + + +; Floating declarations +declare {<2 x half>,<2 x half>} @llvm.experimental.vector.deinterleave2.v4f16(<4 x half>) +declare {<4 x half>, <4 x half>} @llvm.experimental.vector.deinterleave2.v8f16(<8 x half>) +declare {<2 x float>, <2 x float>} @llvm.experimental.vector.deinterleave2.v4f32(<4 x float>) +declare {<8 x half>, <8 x half>} @llvm.experimental.vector.deinterleave2.v16f16(<16 x half>) +declare {<4 x float>, <4 x float>} @llvm.experimental.vector.deinterleave2.v8f32(<8 x float>) +declare {<2 x double>, <2 x double>} @llvm.experimental.vector.deinterleave2.v4f64(<4 x double>) + +; Integer declarations +declare {<16 x i8>, <16 x i8>} @llvm.experimental.vector.deinterleave2.v32i8(<32 x i8>) +declare {<8 x i16>, <8 x i16>} @llvm.experimental.vector.deinterleave2.v16i16(<16 x i16>) +declare {<4 x i32>, <4 x i32>} @llvm.experimental.vector.deinterleave2.v8i32(<8 x i32>) +declare {<2 x i64>, <2 x i64>} @llvm.experimental.vector.deinterleave2.v4i64(<4 x i64>) + +; Predicated declarations +declare {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1>) +declare {<8 x i1>, <8 x i1>} @llvm.experimental.vector.deinterleave2.v16i1(<16 x i1>) +declare {<4 x i1>, <4 x i1>} @llvm.experimental.vector.deinterleave2.v8i1(<8 x i1>) +declare {<2 x i1>, <2 x i1>} @llvm.experimental.vector.deinterleave2.v4i1(<4 x i1>) diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll @@ -0,0 +1,266 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s + +define <4 x half> @interleave2_v4f16(<2 x half> %vec0, <2 x half> %vec1) { +; CHECK-LABEL: interleave2_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret + %retval = call <4 x half> @llvm.experimental.vector.interleave2.v4f16(<2 x half> %vec0, <2 x half> %vec1) + ret <4 x half> %retval +} + +define <8 x half> @interleave2_v8f16(<4 x half> %vec0, <4 x half> %vec1) { +; CHECK-LABEL: interleave2_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-NEXT: ret + %retval = call <8 x half> @llvm.experimental.vector.interleave2.v8f16(<4 x half> %vec0, <4 x half> %vec1) + ret <8 x half> %retval +} + +define <16 x half> @interleave2_v16f16(<8 x half> %vec0, <8 x half> %vec1) { +; CHECK-LABEL: interleave2_v16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.8h, v0.8h, v1.8h +; CHECK-NEXT: zip2 v1.8h, v0.8h, v1.8h +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %retval = call <16 x half> @llvm.experimental.vector.interleave2.v16f16(<8 x half> %vec0, <8 x half> %vec1) + ret <16 x half> %retval +} + +define <4 x float> @interleave2_v4f32(<2 x float> %vec0, <2 x float> %vec1) { +; CHECK-LABEL: interleave2_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: rev64 v1.4s, v0.4s +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %retval = call <4 x float> @llvm.experimental.vector.interleave2.v4f32(<2 x float> %vec0, <2 x float> %vec1) + ret <4 x float> %retval +} + +define <8 x float> @interleave2_v8f32(<4 x float> %vec0, <4 x float> %vec1) { +; CHECK-LABEL: interleave2_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.4s, v0.4s, v1.4s +; CHECK-NEXT: zip2 v1.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %retval = call <8 x float> @llvm.experimental.vector.interleave2.v8f32(<4 x float> %vec0, <4 x float> %vec1) + ret <8 x float> %retval +} + +define <4 x double> @interleave2_v4f64(<2 x double> %vec0, <2 x double> %vec1) { +; CHECK-LABEL: interleave2_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.2d, v0.2d, v1.2d +; CHECK-NEXT: zip2 v1.2d, v0.2d, v1.2d +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %retval = call <4 x double>@llvm.experimental.vector.interleave2.v4f64(<2 x double> %vec0, <2 x double> %vec1) + ret <4 x double> %retval +} + +; Integers + +define <32 x i8> @interleave2_v32i8(<16 x i8> %vec0, <16 x i8> %vec1) { +; CHECK-LABEL: interleave2_v32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.16b, v0.16b, v1.16b +; CHECK-NEXT: zip2 v1.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %retval = call <32 x i8> @llvm.experimental.vector.interleave2.v32i8(<16 x i8> %vec0, <16 x i8> %vec1) + ret <32 x i8> %retval +} + +define <16 x i16> @interleave2_v16i16(<8 x i16> %vec0, <8 x i16> %vec1) { +; CHECK-LABEL: interleave2_v16i16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.8h, v0.8h, v1.8h +; CHECK-NEXT: zip2 v1.8h, v0.8h, v1.8h +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %retval = call <16 x i16> @llvm.experimental.vector.interleave2.v16i16(<8 x i16> %vec0, <8 x i16> %vec1) + ret <16 x i16> %retval +} + +define <8 x i32> @interleave2_v8i32(<4 x i32> %vec0, <4 x i32> %vec1) { +; CHECK-LABEL: interleave2_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.4s, v0.4s, v1.4s +; CHECK-NEXT: zip2 v1.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %retval = call <8 x i32> @llvm.experimental.vector.interleave2.v8i32(<4 x i32> %vec0, <4 x i32> %vec1) + ret <8 x i32> %retval +} + +define <4 x i64> @interleave2_v4i64(<2 x i64> %vec0, <2 x i64> %vec1) { +; CHECK-LABEL: interleave2_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.2d, v0.2d, v1.2d +; CHECK-NEXT: zip2 v1.2d, v0.2d, v1.2d +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %retval = call <4 x i64> @llvm.experimental.vector.interleave2.v4i64(<2 x i64> %vec0, <2 x i64> %vec1) + ret <4 x i64> %retval +} + +; Predicated + +define <32 x i1> @interleave2_v32i1(<16 x i1> %vec0, <16 x i1> %vec1) { +; CHECK-LABEL: interleave2_v32i1: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.16b, v0.16b, v1.16b +; CHECK-NEXT: umov w9, v0.b[0] +; CHECK-NEXT: zip2 v0.16b, v0.16b, v1.16b +; CHECK-NEXT: umov w10, v2.b[1] +; CHECK-NEXT: umov w11, v2.b[2] +; CHECK-NEXT: umov w12, v2.b[3] +; CHECK-NEXT: and w9, w9, #0x1 +; CHECK-NEXT: umov w13, v2.b[4] +; CHECK-NEXT: umov w15, v2.b[6] +; CHECK-NEXT: umov w14, v2.b[5] +; CHECK-NEXT: umov w16, v2.b[7] +; CHECK-NEXT: bfi w9, w10, #1, #1 +; CHECK-NEXT: umov w10, v2.b[8] +; CHECK-NEXT: bfi w9, w11, #2, #1 +; CHECK-NEXT: umov w11, v2.b[9] +; CHECK-NEXT: bfi w9, w12, #3, #1 +; CHECK-NEXT: and w15, w15, #0x1 +; CHECK-NEXT: bfi w9, w13, #4, #1 +; CHECK-NEXT: umov w12, v2.b[10] +; CHECK-NEXT: bfi w9, w14, #5, #1 +; CHECK-NEXT: and w16, w16, #0x1 +; CHECK-NEXT: orr w9, w9, w15, lsl #6 +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: orr w9, w9, w16, lsl #7 +; CHECK-NEXT: umov w13, v2.b[11] +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: orr w9, w9, w10, lsl #8 +; CHECK-NEXT: umov w10, v0.b[0] +; CHECK-NEXT: umov w16, v0.b[1] +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: orr w9, w9, w11, lsl #9 +; CHECK-NEXT: umov w11, v0.b[2] +; CHECK-NEXT: umov w14, v2.b[12] +; CHECK-NEXT: orr w9, w9, w12, lsl #10 +; CHECK-NEXT: umov w12, v0.b[3] +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: bfi w10, w16, #1, #1 +; CHECK-NEXT: umov w15, v2.b[13] +; CHECK-NEXT: orr w9, w9, w13, lsl #11 +; CHECK-NEXT: umov w13, v0.b[4] +; CHECK-NEXT: bfi w10, w11, #2, #1 +; CHECK-NEXT: umov w11, v0.b[6] +; CHECK-NEXT: and w14, w14, #0x1 +; CHECK-NEXT: umov w16, v0.b[5] +; CHECK-NEXT: bfi w10, w12, #3, #1 +; CHECK-NEXT: umov w12, v0.b[7] +; CHECK-NEXT: orr w9, w9, w14, lsl #12 +; CHECK-NEXT: umov w14, v0.b[8] +; CHECK-NEXT: bfi w10, w13, #4, #1 +; CHECK-NEXT: and w13, w15, #0x1 +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: umov w15, v0.b[9] +; CHECK-NEXT: bfi w10, w16, #5, #1 +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: orr w10, w10, w11, lsl #6 +; CHECK-NEXT: umov w16, v2.b[14] +; CHECK-NEXT: orr w10, w10, w12, lsl #7 +; CHECK-NEXT: and w11, w14, #0x1 +; CHECK-NEXT: umov w12, v0.b[10] +; CHECK-NEXT: orr w9, w9, w13, lsl #13 +; CHECK-NEXT: and w13, w15, #0x1 +; CHECK-NEXT: umov w14, v0.b[11] +; CHECK-NEXT: orr w10, w10, w11, lsl #8 +; CHECK-NEXT: umov w11, v0.b[12] +; CHECK-NEXT: orr w10, w10, w13, lsl #9 +; CHECK-NEXT: umov w13, v0.b[13] +; CHECK-NEXT: and w15, w16, #0x1 +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: umov w16, v0.b[14] +; CHECK-NEXT: and w14, w14, #0x1 +; CHECK-NEXT: orr w10, w10, w12, lsl #10 +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: umov w12, v2.b[15] +; CHECK-NEXT: orr w10, w10, w14, lsl #11 +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: umov w14, v1.b[15] +; CHECK-NEXT: orr w10, w10, w11, lsl #12 +; CHECK-NEXT: and w11, w16, #0x1 +; CHECK-NEXT: orr w10, w10, w13, lsl #13 +; CHECK-NEXT: orr w9, w9, w15, lsl #14 +; CHECK-NEXT: orr w10, w10, w11, lsl #14 +; CHECK-NEXT: orr w9, w9, w12, lsl #15 +; CHECK-NEXT: orr w10, w10, w14, lsl #15 +; CHECK-NEXT: strh w9, [x8] +; CHECK-NEXT: strh w10, [x8, #2] +; CHECK-NEXT: ret + %retval = call <32 x i1> @llvm.experimental.vector.interleave2.v32i1(<16 x i1> %vec0, <16 x i1> %vec1) + ret <32 x i1> %retval +} + +define <16 x i1> @interleave2_v16i1(<8 x i1> %vec0, <8 x i1> %vec1) { +; CHECK-LABEL: interleave2_v16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI11_0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-NEXT: ret + %retval = call <16 x i1> @llvm.experimental.vector.interleave2.v16i1(<8 x i1> %vec0, <8 x i1> %vec1) + ret <16 x i1> %retval +} + +define <8 x i1> @interleave2_v8i1(<4 x i1> %vec0, <4 x i1> %vec1) { +; CHECK-LABEL: interleave2_v8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: trn1 v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ret + %retval = call <8 x i1> @llvm.experimental.vector.interleave2.v8i1(<4 x i1> %vec0, <4 x i1> %vec1) + ret <8 x i1> %retval +} + +define <4 x i1> @interleave2_v4i1(<2 x i1> %vec0, <2 x i1> %vec1) { +; CHECK-LABEL: interleave2_v4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: trn1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret + %retval = call <4 x i1> @llvm.experimental.vector.interleave2.v4i1(<2 x i1> %vec0, <2 x i1> %vec1) + ret <4 x i1> %retval +} + + +; Float declarations +declare <4 x half> @llvm.experimental.vector.interleave2.v4f16(<2 x half>, <2 x half>) +declare <8 x half> @llvm.experimental.vector.interleave2.v8f16(<4 x half>, <4 x half>) +declare <16 x half> @llvm.experimental.vector.interleave2.v16f16(<8 x half>, <8 x half>) +declare <4 x float> @llvm.experimental.vector.interleave2.v4f32(<2 x float>, <2 x float>) +declare <8 x float> @llvm.experimental.vector.interleave2.v8f32(<4 x float>, <4 x float>) +declare <4 x double> @llvm.experimental.vector.interleave2.v4f64(<2 x double>, <2 x double>) + +; Integer declarations +declare <32 x i8> @llvm.experimental.vector.interleave2.v32i8(<16 x i8>, <16 x i8>) +declare <16 x i16> @llvm.experimental.vector.interleave2.v16i16(<8 x i16>, <8 x i16>) +declare <8 x i32> @llvm.experimental.vector.interleave2.v8i32(<4 x i32>, <4 x i32>) +declare <4 x i64> @llvm.experimental.vector.interleave2.v4i64(<2 x i64>, <2 x i64>) + +; Predicated +declare <32 x i1> @llvm.experimental.vector.interleave2.v32i1(<16 x i1>, <16 x i1>) +declare <16 x i1> @llvm.experimental.vector.interleave2.v16i1(<8 x i1>, <8 x i1>) +declare <8 x i1> @llvm.experimental.vector.interleave2.v8i1(<4 x i1>, <4 x i1>) +declare <4 x i1> @llvm.experimental.vector.interleave2.v4i1(<2 x i1>, <2 x i1>) diff --git a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sve2 | FileCheck %s + +define {, } @vector_deinterleave_nxv2f16_nxv4f16( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv2f16_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z1.d, z0.s +; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: uzp1 z0.d, z2.d, z1.d +; CHECK-NEXT: uzp2 z1.d, z2.d, z1.d +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv4f16( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv4f16_nxv8f16( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv4f16_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z1.s, z0.h +; CHECK-NEXT: uunpklo z2.s, z0.h +; CHECK-NEXT: uzp1 z0.s, z2.s, z1.s +; CHECK-NEXT: uzp2 z1.s, z2.s, z1.s +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv8f16( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv2f32_nxv4f32( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv2f32_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z1.d, z0.s +; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: uzp1 z0.d, z2.d, z1.d +; CHECK-NEXT: uzp2 z1.d, z2.d, z1.d +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv4f32( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv8f16_nxv16f16( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv8f16_nxv16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z2.h, z0.h, z1.h +; CHECK-NEXT: uzp2 z1.h, z0.h, z1.h +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv16f16( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv4f32_nxv8f32( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv4f32_nxv8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z2.s, z0.s, z1.s +; CHECK-NEXT: uzp2 z1.s, z0.s, z1.s +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv8f32( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv2f64_nxv4f64( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv2f64_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d +; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv4f64( %vec) +ret {, } %retval +} + +; Integers + +define {, } @vector_deinterleave_nxv16i8_nxv32i8( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv16i8_nxv32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z2.b, z0.b, z1.b +; CHECK-NEXT: uzp2 z1.b, z0.b, z1.b +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv32i8( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv8i16_nxv16i16( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv8i16_nxv16i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z2.h, z0.h, z1.h +; CHECK-NEXT: uzp2 z1.h, z0.h, z1.h +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv16i16( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv4i32_nxvv8i32( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv4i32_nxvv8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z2.s, z0.s, z1.s +; CHECK-NEXT: uzp2 z1.s, z0.s, z1.s +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv8i32( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv2i64_nxv4i64( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d +; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv4i64( %vec) +ret {, } %retval +} + +; Predicated +define {, } @vector_deinterleave_nxv16i1_nxv32i1( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 p2.b, p0.b, p1.b +; CHECK-NEXT: uzp2 p1.b, p0.b, p1.b +; CHECK-NEXT: mov p0.b, p2.b +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv32i1( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv8i1_nxv16i1( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv8i1_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p1.h, p0.b +; CHECK-NEXT: punpklo p2.h, p0.b +; CHECK-NEXT: uzp1 p0.h, p2.h, p1.h +; CHECK-NEXT: uzp2 p1.h, p2.h, p1.h +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv16i1( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv4i1_nxv8i1( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv4i1_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p1.h, p0.b +; CHECK-NEXT: punpklo p2.h, p0.b +; CHECK-NEXT: uzp1 p0.s, p2.s, p1.s +; CHECK-NEXT: uzp2 p1.s, p2.s, p1.s +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv8i1( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv2i1_nxv4i1( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv2i1_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p1.h, p0.b +; CHECK-NEXT: punpklo p2.h, p0.b +; CHECK-NEXT: uzp1 p0.d, p2.d, p1.d +; CHECK-NEXT: uzp2 p1.d, p2.d, p1.d +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv4i1( %vec) +ret {, } %retval +} + + +; Floating declarations +declare {,} @llvm.experimental.vector.deinterleave2.nxv4f16() +declare {, } @llvm.experimental.vector.deinterleave2.nxv8f16() +declare {, } @llvm.experimental.vector.deinterleave2.nxv4f32() +declare {, } @llvm.experimental.vector.deinterleave2.nxv16f16() +declare {, } @llvm.experimental.vector.deinterleave2.nxv8f32() +declare {, } @llvm.experimental.vector.deinterleave2.nxv4f64() + +; Integer declarations +declare {, } @llvm.experimental.vector.deinterleave2.nxv32i8() +declare {, } @llvm.experimental.vector.deinterleave2.nxv16i16() +declare {, } @llvm.experimental.vector.deinterleave2.nxv8i32() +declare {, } @llvm.experimental.vector.deinterleave2.nxv4i64() + +; Predicated declarations +declare {, } @llvm.experimental.vector.deinterleave2.nxv32i1() +declare {, } @llvm.experimental.vector.deinterleave2.nxv16i1() +declare {, } @llvm.experimental.vector.deinterleave2.nxv8i1() +declare {, } @llvm.experimental.vector.deinterleave2.nxv4i1() diff --git a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll @@ -0,0 +1,181 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s + +define @interleave2_nxv4f16( %vec0, %vec1) { +; CHECK-LABEL: interleave2_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 z2.d, z0.d, z1.d +; CHECK-NEXT: zip1 z0.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv4f16( %vec0, %vec1) + ret %retval +} + +define @interleave2_nxv8f16( %vec0, %vec1) { +; CHECK-LABEL: interleave2_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 z2.s, z0.s, z1.s +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv8f16( %vec0, %vec1) + ret %retval +} + +define @interleave2_nxv16f16( %vec0, %vec1) { +; CHECK-LABEL: interleave2_nxv16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.h, z0.h, z1.h +; CHECK-NEXT: zip2 z1.h, z0.h, z1.h +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv16f16( %vec0, %vec1) + ret %retval +} + +define @interleave2_nxv4f32( %vec0, %vec1) { +; CHECK-LABEL: interleave2_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 z2.d, z0.d, z1.d +; CHECK-NEXT: zip1 z0.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv4f32( %vec0, %vec1) + ret %retval +} + +define @interleave2_nxv8f32( %vec0, %vec1) { +; CHECK-LABEL: interleave2_nxv8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.s, z0.s, z1.s +; CHECK-NEXT: zip2 z1.s, z0.s, z1.s +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv8f32( %vec0, %vec1) + ret %retval +} + +define @interleave2_nxv4f64( %vec0, %vec1) { +; CHECK-LABEL: interleave2_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.d, z0.d, z1.d +; CHECK-NEXT: zip2 z1.d, z0.d, z1.d +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv4f64( %vec0, %vec1) + ret %retval +} + +; Integers + +define @interleave2_nxv32i8( %vec0, %vec1) { +; CHECK-LABEL: interleave2_nxv32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.b, z0.b, z1.b +; CHECK-NEXT: zip2 z1.b, z0.b, z1.b +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv32i8( %vec0, %vec1) + ret %retval +} + +define @interleave2_nxv16i16( %vec0, %vec1) { +; CHECK-LABEL: interleave2_nxv16i16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.h, z0.h, z1.h +; CHECK-NEXT: zip2 z1.h, z0.h, z1.h +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv16i16( %vec0, %vec1) + ret %retval +} + +define @interleave2_nxv8i32( %vec0, %vec1) { +; CHECK-LABEL: interleave2_nxv8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.s, z0.s, z1.s +; CHECK-NEXT: zip2 z1.s, z0.s, z1.s +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv8i32( %vec0, %vec1) + ret %retval +} + +define @interleave2_nxv4i64( %vec0, %vec1) { +; CHECK-LABEL: interleave2_nxv4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.d, z0.d, z1.d +; CHECK-NEXT: zip2 z1.d, z0.d, z1.d +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv4i64( %vec0, %vec1) + ret %retval +} + +; Predicated + +define @interleave2_nxv32i1( %vec0, %vec1) { +; CHECK-LABEL: interleave2_nxv32i1: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 p2.b, p0.b, p1.b +; CHECK-NEXT: zip2 p1.b, p0.b, p1.b +; CHECK-NEXT: mov p0.b, p2.b +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv32i1( %vec0, %vec1) + ret %retval +} + +define @interleave2_nxv16i1( %vec0, %vec1) { +; CHECK-LABEL: interleave2_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 p2.h, p0.h, p1.h +; CHECK-NEXT: zip1 p0.h, p0.h, p1.h +; CHECK-NEXT: uzp1 p0.b, p0.b, p2.b +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv16i1( %vec0, %vec1) + ret %retval +} + +define @interleave2_nxv8i1( %vec0, %vec1) { +; CHECK-LABEL: interleave2_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 p2.s, p0.s, p1.s +; CHECK-NEXT: zip1 p0.s, p0.s, p1.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p2.h +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv8i1( %vec0, %vec1) + ret %retval +} + +define @interleave2_nxv4i1( %vec0, %vec1) { +; CHECK-LABEL: interleave2_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 p2.d, p0.d, p1.d +; CHECK-NEXT: zip1 p0.d, p0.d, p1.d +; CHECK-NEXT: uzp1 p0.s, p0.s, p2.s +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave2.nxv4i1( %vec0, %vec1) + ret %retval +} + + +; Float declarations +declare @llvm.experimental.vector.interleave2.nxv4f16(, ) +declare @llvm.experimental.vector.interleave2.nxv8f16(, ) +declare @llvm.experimental.vector.interleave2.nxv16f16(, ) +declare @llvm.experimental.vector.interleave2.nxv4f32(, ) +declare @llvm.experimental.vector.interleave2.nxv8f32(, ) +declare @llvm.experimental.vector.interleave2.nxv4f64(, ) + +; Integer declarations +declare @llvm.experimental.vector.interleave2.nxv32i8(, ) +declare @llvm.experimental.vector.interleave2.nxv16i16(, ) +declare @llvm.experimental.vector.interleave2.nxv8i32(, ) +declare @llvm.experimental.vector.interleave2.nxv4i64(, ) + +; Predicated +declare @llvm.experimental.vector.interleave2.nxv32i1(, ) +declare @llvm.experimental.vector.interleave2.nxv16i1(, ) +declare @llvm.experimental.vector.interleave2.nxv8i1(, ) +declare @llvm.experimental.vector.interleave2.nxv4i1(, )