diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -17671,6 +17671,79 @@ The argument to this intrinsic must be a vector. +'``llvm.experimental.vector.deinterleave.even/odd``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <2 x double> @llvm.experimental.vector.deinterleave.even.v2f64(<4 x double> %vec1) + declare @llvm.experimental.vector.deinterleave.even.nxv4i32( %vec1) + + declare <2 x double> @llvm.experimental.vector.deinterleave.odd.v2f64(<4 x double> %vec1) + declare @llvm.experimental.vector.deinterleave.odd.nxv4i32( %vec1) + +Overview: +""""""""" + +The '``llvm.experimental.vector.deinterleave.even/odd``' intrinsics construct a +vector by deinterleaving the even or odd lanes of the input vector. + +These intrinsics work for both fixed and scalable vectors. While this intrinsic +is marked as experimental, the recommended way to express this operation for +fixed-width vectors is still to use a shufflevector, as that may allow for more +optimization opportunities. + +For example: + +.. code-block:: text + + <2 x i64> llvm.experimental.vector.deinterleave.even.v2i64(<4 x i64> ); ==> <2 x i64> + <2 x i64> llvm.experimental.vector.deinterleave.odd.v2i64(<4 x i64> <2 x i64> + +Arguments: +"""""""""" + +The argument to this intrinsic must be a vector and that is twice the size of +the output vector. + +'``llvm.experimental.vector.interleave``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <4 x double> @llvm.experimental.vector.interleave.v2f64(<2 x double> %vec1, <2 x double> %vec2) + declare @llvm.experimental.vector.interleave.nxv4i32( %vec1, %vec2) + +Overview: +""""""""" + +The '``llvm.experimental.vector.interleave``' intrinsics construct a vector +by interleaving two input vectors. + +These intrinsics work for both fixed and scalable vectors. While this intrinsic +is marked as experimental, the recommended way to express this operation for +fixed-width vectors is still to use a shufflevector, as that may allow for more +optimization opportunities. + +For example: + +.. code-block:: text + + <4 x i64> llvm.experimental.vector.interleave.v2i64(<2 x i64> , <2 x i64> ); ==> <4 x i64> + +Arguments: +"""""""""" +The argument to this intrinsic must be two vectors where each vector must be +half the size of the output. + '``llvm.experimental.vector.splice``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -571,6 +571,19 @@ /// vector, but not the other way around. EXTRACT_SUBVECTOR, + /// VECTOR_DEINTERLEAVE(VEC1, VEC2, IDX) - Returns a deinterleaved subvector + /// from VEC1 and VEC2. The vector is deinterleaved with a stride of 2 + /// starting at IDX, which must be a constant value in the range [0, 1]. + /// The result vector type must match the type of VEC1 and VEC2. + VECTOR_DEINTERLEAVE, + + /// VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two interleaved vectors; + /// the first result vector is constructed from interleaving the low halves + /// of VEC1 and VEC2. The second result vector is constructed from + /// interleaving the high halves of VEC1 and VEC2. + /// The type of the two result vectors must match the type of VEC1 and VEC2. + VECTOR_INTERLEAVE, + /// VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, /// whose elements are shuffled using the following algorithm: /// RESULT[i] = VECTOR[VECTOR.ElementCount - 1 - i] diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2116,6 +2116,20 @@ [llvm_anyvector_ty, llvm_i64_ty], [IntrNoMem, IntrSpeculatable, ImmArg>]>; + +def int_experimental_vector_interleave : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMHalfElementsVectorType<0>, + LLVMHalfElementsVectorType<0>], + [IntrNoMem]>; + +def int_experimental_vector_deinterleave_even : DefaultAttrsIntrinsic<[LLVMHalfElementsVectorType<0>], + [llvm_anyvector_ty], + [IntrNoMem]>; + +def int_experimental_vector_deinterleave_odd : DefaultAttrsIntrinsic<[LLVMHalfElementsVectorType<0>], + [llvm_anyvector_ty], + [IntrNoMem]>; + //===----------------- Pointer Authentication Intrinsics ------------------===// // diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -703,6 +703,10 @@ def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>, []>; +def vector_deinterleave : SDNode<"ISD::VECTOR_DEINTERLEAVE", SDTypeProfile<1, 3, + [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, + SDTCisInt<3>]>, []>; + // vector_extract/vector_insert are deprecated. extractelt/insertelt // are preferred. def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -648,6 +648,8 @@ void visitVectorReduce(const CallInst &I, unsigned Intrinsic); void visitVectorReverse(const CallInst &I); void visitVectorSplice(const CallInst &I); + void visitVectorInterleave(const CallInst &I); + void visitVectorDeinterleave(const CallInst &I, unsigned Idx); void visitStepVector(const CallInst &I); void visitUserOp1(const Instruction &I) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -29,6 +29,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "llvm/CodeGen/CodeGenCommonISel.h" @@ -7319,6 +7320,15 @@ case Intrinsic::experimental_vector_splice: visitVectorSplice(I); return; + case Intrinsic::experimental_vector_interleave: + visitVectorInterleave(I); + return; + case Intrinsic::experimental_vector_deinterleave_even: + visitVectorDeinterleave(I, 0); + return; + case Intrinsic::experimental_vector_deinterleave_odd: + visitVectorDeinterleave(I, 1); + return; } } @@ -11549,6 +11559,62 @@ setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask)); } +void SelectionDAGBuilder::visitVectorDeinterleave(const CallInst &I, + unsigned Idx) { + auto DL = getCurSDLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + SDValue InVec = getValue(I.getOperand(0)); + EVT OutVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + unsigned OutNumElts = OutVT.getVectorMinNumElements(); + + // Split the input vector into two parts + SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, InVec, + DAG.getConstant(0, DL, MVT::i64)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, InVec, + DAG.getConstant(OutNumElts, DL, MVT::i64)); + + // Use VECTOR_SHUFFLE for the fixed-length vector to to benefit from + // existing combines for recognizing specific deinterleave patterns using + // VECTOR_SHUFFLE. + if (!OutVT.isScalableVector()) { + SDValue Res = DAG.getVectorShuffle(OutVT, DL, Lo, Hi, + createStrideMask(Idx, 2, OutNumElts)); + setValue(&I, Res); + return; + } + + SDValue Res = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, OutVT, Lo, Hi, + DAG.getConstant(Idx, DL, MVT::i64)); + setValue(&I, Res); + return; +} + +void SelectionDAGBuilder::visitVectorInterleave(const CallInst &I) { + auto DL = getCurSDLoc(); + EVT InVT = getValue(I.getOperand(0)).getValueType(); + SDValue InVec[] = {getValue(I.getOperand(0)), getValue(I.getOperand(1))}; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT OutVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + // Use VECTOR_SHUFFLE for the fixed-length vector to to benefit from + // existing combines for recognizing specific interleave patterns using + // VECTOR_SHUFFLE. + if (!OutVT.isScalableVector()) { + unsigned NumElts = InVT.getVectorMinNumElements(); + SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, InVec[0], InVec[1]); + setValue(&I, DAG.getVectorShuffle(OutVT, DL, V, DAG.getUNDEF(OutVT), + createInterleaveMask(NumElts, 2))); + return; + } + + SDValue Res = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, {InVT, InVT}, InVec); + Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Res.getValue(0), + Res.getValue(1)); + setValue(&I, Res); + return; +} + void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) { SmallVector ValueVTs; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -293,6 +293,8 @@ case ISD::CONCAT_VECTORS: return "concat_vectors"; case ISD::INSERT_SUBVECTOR: return "insert_subvector"; case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::VECTOR_DEINTERLEAVE: return "vector_deinterleave"; + case ISD::VECTOR_INTERLEAVE: return "vector_interleave"; case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; case ISD::VECTOR_SPLICE: return "vector_splice"; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1045,6 +1045,8 @@ SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1188,6 +1188,8 @@ {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) { setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); + setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); } } @@ -1233,6 +1235,8 @@ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); + setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); + setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); @@ -1374,6 +1378,8 @@ setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); + setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom); + setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); @@ -6044,6 +6050,10 @@ return LowerCTTZ(Op, DAG); case ISD::VECTOR_SPLICE: return LowerVECTOR_SPLICE(Op, DAG); + case ISD::VECTOR_DEINTERLEAVE: + return LowerVECTOR_DEINTERLEAVE(Op, DAG); + case ISD::VECTOR_INTERLEAVE: + return LowerVECTOR_INTERLEAVE(Op, DAG); case ISD::STRICT_LROUND: case ISD::STRICT_LLROUND: case ISD::STRICT_LRINT: @@ -23478,6 +23488,33 @@ } } +SDValue +AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT OpVT = Op.getValueType(); + assert(OpVT.isScalableVector() && + "Unexpected fixed length vector in LowerVECTOR_DEINTERLEAVE."); + unsigned Stride = Op.getNumOperands() - 1; + unsigned Idx = Op.getConstantOperandVal(Stride); + unsigned Opcode = (Idx == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2; + return DAG.getNode(Opcode, DL, OpVT, Op.getOperand(0), Op.getOperand(1)); +} + +SDValue AArch64TargetLowering::LowerVECTOR_INTERLEAVE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT OpVT = Op.getValueType(); + assert(OpVT.isScalableVector() && + "Unexpected fixed length vector in LowerVECTOR_INTERLEAVE."); + + SDValue Lo = DAG.getNode(AArch64ISD::ZIP1, DL, OpVT, Op.getOperand(0), + Op.getOperand(1)); + SDValue Hi = DAG.getNode(AArch64ISD::ZIP2, DL, OpVT, Op.getOperand(0), + Op.getOperand(1)); + return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(OpVT, OpVT), Lo, Hi); +} + SDValue AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll @@ -0,0 +1,219 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sve2 | FileCheck %s + +define < 2 x half> @extract_elements_0_v2f16_v4f16(<4 x half> %vec) { +; CHECK-LABEL: extract_elements_0_v2f16_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v1.4h, v0.h[2] +; CHECK-NEXT: mov v0.h[1], v1.h[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +%retval = call < 2 x half> @llvm.experimental.vector.deinterleave.even.v2f16(<4 x half> %vec) +ret < 2 x half> %retval +} + +define < 2 x half> @extract_elements_1_v2f16_v4f16(<4 x half> %vec) { +; CHECK-LABEL: extract_elements_1_v2f16_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v1.4h, v0.h[3] +; CHECK-NEXT: mov v1.h[0], v0.h[1] +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret +%retval = call < 2 x half> @llvm.experimental.vector.deinterleave.odd.v2f16(<4 x half> %vec) +ret < 2 x half> %retval +} + +define < 4 x half> @extract_elements_0_v4f16_v8f16(<8 x half> %vec) { +; CHECK-LABEL: extract_elements_0_v4f16_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +%retval = call < 4 x half> @llvm.experimental.vector.deinterleave.even.v4f16(<8 x half> %vec) +ret < 4 x half> %retval +} + +define < 4 x half> @extract_elements_1_v4f16_v8f16(<8 x half> %vec) { +; CHECK-LABEL: extract_elements_1_v4f16_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: uzp2 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret +%retval = call < 4 x half> @llvm.experimental.vector.deinterleave.odd.v4f16(<8 x half> %vec) +ret < 4 x half> %retval +} + +define < 2 x float> @extract_elements_0_v2f32_v4f32(<4 x float> %vec) { +; CHECK-LABEL: extract_elements_0_v2f32_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: zip1 v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ret +%retval = call < 2 x float> @llvm.experimental.vector.deinterleave.even.v2f32(<4 x float> %vec) +ret < 2 x float> %retval +} + +define < 2 x float> @extract_elements_1_v2f32_v4f32(<4 x float> %vec) { +; CHECK-LABEL: extract_elements_1_v2f32_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ret +%retval = call < 2 x float> @llvm.experimental.vector.deinterleave.odd.v2f32(<4 x float> %vec) +ret < 2 x float> %retval +} + +define < 8 x half> @extract_elements_0_v8f16_v16f16(<16 x half> %vec) { +; CHECK-LABEL: extract_elements_0_v8f16_v16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +%retval = call < 8 x half> @llvm.experimental.vector.deinterleave.even.v8f16(<16 x half> %vec) +ret < 8 x half> %retval +} + +define < 8 x half> @extract_elements_1_v8f16_v16f16(<16 x half> %vec) { +; CHECK-LABEL: extract_elements_1_v8f16_v16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +%retval = call < 8 x half> @llvm.experimental.vector.deinterleave.odd.v8f16(<16 x half> %vec) +ret < 8 x half> %retval +} + + +define < 4 x float> @extract_elements_0_v4f32_v8f32(<8 x float> %vec) { +; CHECK-LABEL: extract_elements_0_v4f32_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret +%retval = call < 4 x float> @llvm.experimental.vector.deinterleave.even.v4f32(<8 x float> %vec) +ret < 4 x float> %retval +} + +define < 4 x float> @extract_elements_1_v4f32_v8f32(<8 x float> %vec) { +; CHECK-LABEL: extract_elements_1_v4f32_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret +%retval = call < 4 x float> @llvm.experimental.vector.deinterleave.odd.v4f32(<8 x float> %vec) +ret < 4 x float> %retval +} + +define < 2 x double> @extract_elements_0_v2f64_v4f64(<4 x double> %vec) { +; CHECK-LABEL: extract_elements_0_v2f64_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret +%retval = call < 2 x double> @llvm.experimental.vector.deinterleave.even.v2f64(<4 x double> %vec) +ret < 2 x double> %retval +} + +define < 2 x double> @extract_elements_1_v2f64_v4f64(<4 x double> %vec) { +; CHECK-LABEL: extract_elements_1_v2f64_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret +%retval = call < 2 x double> @llvm.experimental.vector.deinterleave.odd.v2f64(<4 x double> %vec) +ret < 2 x double> %retval +} + +; Integers + +define < 16 x i8> @extract_elements_0_v16i8_v32i8(<32 x i8> %vec) { +; CHECK-LABEL: extract_elements_0_v16i8_v32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret +%retval = call < 16 x i8> @llvm.experimental.vector.deinterleave.even.v16i8(<32 x i8> %vec) +ret < 16 x i8> %retval +} + +define < 16 x i8> @extract_elements_1_v16i8_v32i8(<32 x i8> %vec) { +; CHECK-LABEL: extract_elements_1_v16i8_v32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret +%retval = call < 16 x i8> @llvm.experimental.vector.deinterleave.odd.v16i8(<32 x i8> %vec) +ret < 16 x i8> %retval +} + +define < 8 x i16> @extract_elements_0_v8i16_v16i16(<16 x i16> %vec) { +; CHECK-LABEL: extract_elements_0_v8i16_v16i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +%retval = call < 8 x i16> @llvm.experimental.vector.deinterleave.even.v8i16(<16 x i16> %vec) +ret < 8 x i16> %retval +} + +define < 8 x i16> @extract_elements_1_v8i16_v16i16(<16 x i16> %vec) { +; CHECK-LABEL: extract_elements_1_v8i16_v16i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ret +%retval = call < 8 x i16> @llvm.experimental.vector.deinterleave.odd.v8i16(<16 x i16> %vec) +ret < 8 x i16> %retval +} + +define < 4 x i32> @extract_elements_0_v4i32_v8i32(<8 x i32> %vec) { +; CHECK-LABEL: extract_elements_0_v4i32_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret +%retval = call < 4 x i32> @llvm.experimental.vector.deinterleave.even.v4i32(<8 x i32> %vec) +ret < 4 x i32> %retval +} + +define < 4 x i32> @extract_elements_1_v4i32_v8i32(<8 x i32> %vec) { +; CHECK-LABEL: extract_elements_1_v4i32_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret +%retval = call < 4 x i32> @llvm.experimental.vector.deinterleave.odd.v4i32(<8 x i32> %vec) +ret < 4 x i32> %retval +} + +define < 2 x i64> @extract_elements_0_v2i64_v4i64(<4 x i64> %vec) { +; CHECK-LABEL: extract_elements_0_v2i64_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret +%retval = call < 2 x i64> @llvm.experimental.vector.deinterleave.even.v2i64(<4 x i64> %vec) +ret < 2 x i64> %retval +} + +define < 2 x i64> @extract_elements_1_v2i64_v4i64(<4 x i64> %vec) { +; CHECK-LABEL: extract_elements_1_v2i64_v4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret +%retval = call < 2 x i64> @llvm.experimental.vector.deinterleave.odd.v2i64(<4 x i64> %vec) +ret < 2 x i64> %retval +} + +; Floating declarations +declare < 2 x half> @llvm.experimental.vector.deinterleave.even.v2f16(<4 x half>) +declare < 4 x half> @llvm.experimental.vector.deinterleave.even.v4f16(<8 x half>) +declare < 2 x float> @llvm.experimental.vector.deinterleave.even.v2f32(<4 x float>) +declare < 8 x half> @llvm.experimental.vector.deinterleave.even.v8f16(<16 x half>) +declare < 4 x float> @llvm.experimental.vector.deinterleave.even.v4f32(<8 x float>) +declare < 2 x double> @llvm.experimental.vector.deinterleave.even.v2f64(<4 x double>) +declare < 2 x half> @llvm.experimental.vector.deinterleave.odd.v2f16(<4 x half>) +declare < 4 x half> @llvm.experimental.vector.deinterleave.odd.v4f16(<8 x half>) +declare < 2 x float> @llvm.experimental.vector.deinterleave.odd.v2f32(<4 x float>) +declare < 8 x half> @llvm.experimental.vector.deinterleave.odd.v8f16(<16 x half>) +declare < 4 x float> @llvm.experimental.vector.deinterleave.odd.v4f32(<8 x float>) +declare < 2 x double> @llvm.experimental.vector.deinterleave.odd.v2f64(<4 x double>) + +; Integer declarations +declare < 16 x i8> @llvm.experimental.vector.deinterleave.even.v16i8(<32 x i8>) +declare < 8 x i16> @llvm.experimental.vector.deinterleave.even.v8i16(<16 x i16>) +declare < 4 x i32> @llvm.experimental.vector.deinterleave.even.v4i32(<8 x i32>) +declare < 2 x i64> @llvm.experimental.vector.deinterleave.even.v2i64(<4 x i64>) +declare < 16 x i8> @llvm.experimental.vector.deinterleave.odd.v16i8(<32 x i8>) +declare < 8 x i16> @llvm.experimental.vector.deinterleave.odd.v8i16(<16 x i16>) +declare < 4 x i32> @llvm.experimental.vector.deinterleave.odd.v4i32(<8 x i32>) +declare < 2 x i64> @llvm.experimental.vector.deinterleave.odd.v2i64(<4 x i64>) diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll @@ -0,0 +1,131 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s + +define <4 x half> @insert_elements_s2_v2f16(<2 x half> %vec0, <2 x half> %vec1) { +; CHECK-LABEL: insert_elements_s2_v2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ret + %retval = call <4 x half> @llvm.experimental.vector.interleave.v2f16(<2 x half> %vec0, <2 x half> %vec1) + ret <4 x half> %retval +} + +define <8 x half> @insert_elements_s2_v4f16(<4 x half> %vec0, <4 x half> %vec1) { +; CHECK-LABEL: insert_elements_s2_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-NEXT: ret + %retval = call <8 x half> @llvm.experimental.vector.interleave.v4f16(<4 x half> %vec0, <4 x half> %vec1) + ret <8 x half> %retval +} + +define <16 x half> @insert_elements_s2_v8f16(<8 x half> %vec0, <8 x half> %vec1) { +; CHECK-LABEL: insert_elements_s2_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.8h, v0.8h, v1.8h +; CHECK-NEXT: zip2 v1.8h, v0.8h, v1.8h +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %retval = call <16 x half> @llvm.experimental.vector.interleave.v8f16(<8 x half> %vec0, <8 x half> %vec1) + ret <16 x half> %retval +} + +define <4 x float> @insert_elements_s2_v2f32(<2 x float> %vec0, <2 x float> %vec1) { +; CHECK-LABEL: insert_elements_s2_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: rev64 v1.4s, v0.4s +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %retval = call <4 x float> @llvm.experimental.vector.interleave.v2f32(<2 x float> %vec0, <2 x float> %vec1) + ret <4 x float> %retval +} + +define <8 x float> @insert_elements_s2_v4f32(<4 x float> %vec0, <4 x float> %vec1) { +; CHECK-LABEL: insert_elements_s2_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.4s, v0.4s, v1.4s +; CHECK-NEXT: zip2 v1.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %retval = call <8 x float> @llvm.experimental.vector.interleave.v4f32(<4 x float> %vec0, <4 x float> %vec1) + ret <8 x float> %retval +} + +define <4 x double> @insert_elements_s2_v2f64(<2 x double> %vec0, <2 x double> %vec1) { +; CHECK-LABEL: insert_elements_s2_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.2d, v0.2d, v1.2d +; CHECK-NEXT: zip2 v1.2d, v0.2d, v1.2d +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %retval = call <4 x double>@llvm.experimental.vector.interleave.v2f64(<2 x double> %vec0, <2 x double> %vec1) + ret <4 x double> %retval +} + +; Integers + +define <32 x i8> @insert_elements_s2_v16i8(<16 x i8> %vec0, <16 x i8> %vec1) { +; CHECK-LABEL: insert_elements_s2_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.16b, v0.16b, v1.16b +; CHECK-NEXT: zip2 v1.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %retval = call <32 x i8> @llvm.experimental.vector.interleave.v16i8(<16 x i8> %vec0, <16 x i8> %vec1) + ret <32 x i8> %retval +} + +define <16 x i16> @insert_elements_s2_v8i16(<8 x i16> %vec0, <8 x i16> %vec1) { +; CHECK-LABEL: insert_elements_s2_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.8h, v0.8h, v1.8h +; CHECK-NEXT: zip2 v1.8h, v0.8h, v1.8h +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %retval = call <16 x i16> @llvm.experimental.vector.interleave.v8i16(<8 x i16> %vec0, <8 x i16> %vec1) + ret <16 x i16> %retval +} + +define <8 x i32> @insert_elements_s2_v4i32(<4 x i32> %vec0, <4 x i32> %vec1) { +; CHECK-LABEL: insert_elements_s2_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.4s, v0.4s, v1.4s +; CHECK-NEXT: zip2 v1.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %retval = call <8 x i32> @llvm.experimental.vector.interleave.v4i32(<4 x i32> %vec0, <4 x i32> %vec1) + ret <8 x i32> %retval +} + +define <4 x i64> @insert_elements_s2_v2i64(<2 x i64> %vec0, <2 x i64> %vec1) { +; CHECK-LABEL: insert_elements_s2_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v2.2d, v0.2d, v1.2d +; CHECK-NEXT: zip2 v1.2d, v0.2d, v1.2d +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %retval = call <4 x i64> @llvm.experimental.vector.interleave.v2i64(<2 x i64> %vec0, <2 x i64> %vec1) + ret <4 x i64> %retval +} + +; Float declarations +declare <4 x half> @llvm.experimental.vector.interleave.v2f16(<2 x half>, <2 x half>) +declare <8 x half> @llvm.experimental.vector.interleave.v4f16(<4 x half>, <4 x half>) +declare <16 x half> @llvm.experimental.vector.interleave.v8f16(<8 x half>, <8 x half>) +declare <4 x float> @llvm.experimental.vector.interleave.v2f32(<2 x float>, <2 x float>) +declare <8 x float> @llvm.experimental.vector.interleave.v4f32(<4 x float>, <4 x float>) +declare <4 x double> @llvm.experimental.vector.interleave.v2f64(<2 x double>, <2 x double>) + +; Integer declarations +declare <32 x i8> @llvm.experimental.vector.interleave.v16i8(<16 x i8>, <16 x i8>) +declare <16 x i16> @llvm.experimental.vector.interleave.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i32> @llvm.experimental.vector.interleave.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i64> @llvm.experimental.vector.interleave.v2i64(<2 x i64>, <2 x i64>) diff --git a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll @@ -0,0 +1,316 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sve2 | FileCheck %s + +define @extract_elements_0_nxv2f16_nxv4f16( %vec) { +; CHECK-LABEL: extract_elements_0_nxv2f16_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z1.d, z0.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.even.nxv2f16( %vec) +ret %retval +} + +define @extract_elements_1_nxv2f16_nxv4f16( %vec) { +; CHECK-LABEL: extract_elements_1_nxv2f16_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z1.d, z0.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.odd.nxv2f16( %vec) +ret %retval +} + +define @extract_elements_0_nxv4f16_nxv8f16( %vec) { +; CHECK-LABEL: extract_elements_0_nxv4f16_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z1.s, z0.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.even.nxv4f16( %vec) +ret %retval +} + +define @extract_elements_1_nxv4f16_nxv8f16( %vec) { +; CHECK-LABEL: extract_elements_1_nxv4f16_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z1.s, z0.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uzp2 z0.s, z0.s, z1.s +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.odd.nxv4f16( %vec) +ret %retval +} + +define @extract_elements_0_nxv2f32_nxv4f32( %vec) { +; CHECK-LABEL: extract_elements_0_nxv2f32_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z1.d, z0.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.even.nxv2f32( %vec) +ret %retval +} + +define @extract_elements_1_nxv2f32_nxv4f32( %vec) { +; CHECK-LABEL: extract_elements_1_nxv2f32_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z1.d, z0.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.odd.nxv2f32( %vec) +ret %retval +} + +define @extract_elements_0_nxv8f16_nxv16f16( %vec) { +; CHECK-LABEL: extract_elements_0_nxv8f16_nxv16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.even.nxv8f16( %vec) +ret %retval +} + +define @extract_elements_1_nxv8f16_nxv16f16( %vec) { +; CHECK-LABEL: extract_elements_1_nxv8f16_nxv16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 z0.h, z0.h, z1.h +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.odd.nxv8f16( %vec) +ret %retval +} + + +define @extract_elements_0_nxv4f32_nxv8f32( %vec) { +; CHECK-LABEL: extract_elements_0_nxv4f32_nxv8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.even.nxv4f32( %vec) +ret %retval +} + +define @extract_elements_1_nxv4f32_nxv8f32( %vec) { +; CHECK-LABEL: extract_elements_1_nxv4f32_nxv8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 z0.s, z0.s, z1.s +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.odd.nxv4f32( %vec) +ret %retval +} + +define @extract_elements_0_nxv2f64_nxv4f64( %vec) { +; CHECK-LABEL: extract_elements_0_nxv2f64_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.even.nxv2f64( %vec) +ret %retval +} + +define @extract_elements_1_nxv2f64_nxv4f64( %vec) { +; CHECK-LABEL: extract_elements_1_nxv2f64_nxv4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.odd.nxv2f64( %vec) +ret %retval +} + +; Integers + +define @extract_elements_0_nxv16i8_nxv32i8( %vec) { +; CHECK-LABEL: extract_elements_0_nxv16i8_nxv32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.even.nxv16i8( %vec) +ret %retval +} + +define @extract_elements_1_nxv16i8_nxv32i8( %vec) { +; CHECK-LABEL: extract_elements_1_nxv16i8_nxv32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 z0.b, z0.b, z1.b +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.odd.nxv16i8( %vec) +ret %retval +} + +define @extract_elements_0_nxv8i16_nxv16i16( %vec) { +; CHECK-LABEL: extract_elements_0_nxv8i16_nxv16i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.even.nxv8i16( %vec) +ret %retval +} + +define @extract_elements_1_nxv8i16_nxv16i16( %vec) { +; CHECK-LABEL: extract_elements_1_nxv8i16_nxv16i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 z0.h, z0.h, z1.h +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.odd.nxv8i16( %vec) +ret %retval +} + +define @extract_elements_0_nxv4i32_nxv8i32( %vec) { +; CHECK-LABEL: extract_elements_0_nxv4i32_nxv8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.even.nxv4i32( %vec) +ret %retval +} + +define @extract_elements_1_nxv4i32_nxv8i32( %vec) { +; CHECK-LABEL: extract_elements_1_nxv4i32_nxv8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 z0.s, z0.s, z1.s +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.odd.nxv4i32( %vec) +ret %retval +} + +define @extract_elements_0_nxv2i64_nxv4i64( %vec) { +; CHECK-LABEL: extract_elements_0_nxv2i64_nxv4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.even.nxv2i64( %vec) +ret %retval +} + +define @extract_elements_1_nxv2i64_nxv4i64( %vec) { +; CHECK-LABEL: extract_elements_1_nxv2i64_nxv4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.odd.nxv2i64( %vec) +ret %retval +} + +; predicated +define @extract_elements_0_nxv16i1_nxv32i1( %vec) { +; CHECK-LABEL: extract_elements_0_nxv16i1_nxv32i1: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.even.nxv16i1( %vec) +ret %retval +} + +define @extract_elements_1_nxv16i1_nxv32i1( %vec) { +; CHECK-LABEL: extract_elements_1_nxv16i1_nxv32i1: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp2 p0.b, p0.b, p1.b +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.odd.nxv16i1( %vec) +ret %retval +} + +define @extract_elements_0_nxv8i1_nxv16i1( %vec) { +; CHECK-LABEL: extract_elements_0_nxv8i1_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p1.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.even.nxv8i1( %vec) +ret %retval +} + +define @extract_elements_1_nxv8i1_nxv16i1( %vec) { +; CHECK-LABEL: extract_elements_1_nxv8i1_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p1.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: uzp2 p0.h, p0.h, p1.h +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.odd.nxv8i1( %vec) +ret %retval +} + +define @extract_elements_0_nxv4i1_nxv8i1( %vec) { +; CHECK-LABEL: extract_elements_0_nxv4i1_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p1.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.even.nxv4i1( %vec) +ret %retval +} + +define @extract_elements_1_nxv4i1_nxv8i1( %vec) { +; CHECK-LABEL: extract_elements_1_nxv4i1_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p1.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: uzp2 p0.s, p0.s, p1.s +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.odd.nxv4i1( %vec) +ret %retval +} + +define @extract_elements_0_nxv2i1_nxv4i1( %vec) { +; CHECK-LABEL: extract_elements_0_nxv2i1_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p1.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.even.nxv2i1( %vec) +ret %retval +} + +define @extract_elements_1_nxv2i1_nxv4i1( %vec) { +; CHECK-LABEL: extract_elements_1_nxv2i1_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p1.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: uzp2 p0.d, p0.d, p1.d +; CHECK-NEXT: ret +%retval = call @llvm.experimental.vector.deinterleave.odd.nxv2i1( %vec) +ret %retval +} + +; Floating declarations +declare @llvm.experimental.vector.deinterleave.even.nxv2f16() +declare @llvm.experimental.vector.deinterleave.even.nxv4f16() +declare @llvm.experimental.vector.deinterleave.even.nxv2f32() +declare @llvm.experimental.vector.deinterleave.even.nxv8f16() +declare @llvm.experimental.vector.deinterleave.even.nxv4f32() +declare @llvm.experimental.vector.deinterleave.even.nxv2f64() +declare @llvm.experimental.vector.deinterleave.odd.nxv2f16() +declare @llvm.experimental.vector.deinterleave.odd.nxv4f16() +declare @llvm.experimental.vector.deinterleave.odd.nxv2f32() +declare @llvm.experimental.vector.deinterleave.odd.nxv8f16() +declare @llvm.experimental.vector.deinterleave.odd.nxv4f32() +declare @llvm.experimental.vector.deinterleave.odd.nxv2f64() + +; Integer declarations +declare @llvm.experimental.vector.deinterleave.even.nxv16i8() +declare @llvm.experimental.vector.deinterleave.even.nxv8i16() +declare @llvm.experimental.vector.deinterleave.even.nxv4i32() +declare @llvm.experimental.vector.deinterleave.even.nxv2i64() +declare @llvm.experimental.vector.deinterleave.odd.nxv16i8() +declare @llvm.experimental.vector.deinterleave.odd.nxv8i16() +declare @llvm.experimental.vector.deinterleave.odd.nxv4i32() +declare @llvm.experimental.vector.deinterleave.odd.nxv2i64() + +; Predicated +declare @llvm.experimental.vector.deinterleave.even.nxv16i1() +declare @llvm.experimental.vector.deinterleave.even.nxv8i1() +declare @llvm.experimental.vector.deinterleave.even.nxv4i1() +declare @llvm.experimental.vector.deinterleave.even.nxv2i1() +declare @llvm.experimental.vector.deinterleave.odd.nxv16i1() +declare @llvm.experimental.vector.deinterleave.odd.nxv8i1() +declare @llvm.experimental.vector.deinterleave.odd.nxv4i1() +declare @llvm.experimental.vector.deinterleave.odd.nxv2i1() diff --git a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll @@ -0,0 +1,181 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s + +define @insert_elements_s2_nxv2f16( %vec0, %vec1) { +; CHECK-LABEL: insert_elements_s2_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 z2.d, z0.d, z1.d +; CHECK-NEXT: zip1 z0.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave.nxv2f16( %vec0, %vec1) + ret %retval +} + +define @insert_elements_s2_nxv4f16( %vec0, %vec1) { +; CHECK-LABEL: insert_elements_s2_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 z2.s, z0.s, z1.s +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave.nxv4f16( %vec0, %vec1) + ret %retval +} + +define @insert_elements_s2_nxv8f16( %vec0, %vec1) { +; CHECK-LABEL: insert_elements_s2_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.h, z0.h, z1.h +; CHECK-NEXT: zip2 z1.h, z0.h, z1.h +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave.nxv8f16( %vec0, %vec1) + ret %retval +} + +define @insert_elements_s2_nxv2f32( %vec0, %vec1) { +; CHECK-LABEL: insert_elements_s2_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 z2.d, z0.d, z1.d +; CHECK-NEXT: zip1 z0.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave.nxv2f32( %vec0, %vec1) + ret %retval +} + +define @insert_elements_s2_nxv4f32( %vec0, %vec1) { +; CHECK-LABEL: insert_elements_s2_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.s, z0.s, z1.s +; CHECK-NEXT: zip2 z1.s, z0.s, z1.s +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave.nxv4f32( %vec0, %vec1) + ret %retval +} + +define @insert_elements_s2_nxv2f64( %vec0, %vec1) { +; CHECK-LABEL: insert_elements_s2_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.d, z0.d, z1.d +; CHECK-NEXT: zip2 z1.d, z0.d, z1.d +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave.nxv2f64( %vec0, %vec1) + ret %retval +} + +; Integers + +define @insert_elements_s2_nxv16i8( %vec0, %vec1) { +; CHECK-LABEL: insert_elements_s2_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.b, z0.b, z1.b +; CHECK-NEXT: zip2 z1.b, z0.b, z1.b +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave.nxv16i8( %vec0, %vec1) + ret %retval +} + +define @insert_elements_s2_nxv8i16( %vec0, %vec1) { +; CHECK-LABEL: insert_elements_s2_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.h, z0.h, z1.h +; CHECK-NEXT: zip2 z1.h, z0.h, z1.h +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave.nxv8i16( %vec0, %vec1) + ret %retval +} + +define @insert_elements_s2_nxv4i32( %vec0, %vec1) { +; CHECK-LABEL: insert_elements_s2_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.s, z0.s, z1.s +; CHECK-NEXT: zip2 z1.s, z0.s, z1.s +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave.nxv4i32( %vec0, %vec1) + ret %retval +} + +define @insert_elements_s2_nxv2i64( %vec0, %vec1) { +; CHECK-LABEL: insert_elements_s2_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 z2.d, z0.d, z1.d +; CHECK-NEXT: zip2 z1.d, z0.d, z1.d +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave.nxv2i64( %vec0, %vec1) + ret %retval +} + +; Predicated + +define @insert_elements_s2_nxv16i1( %vec0, %vec1) { +; CHECK-LABEL: insert_elements_s2_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1 p2.b, p0.b, p1.b +; CHECK-NEXT: zip2 p1.b, p0.b, p1.b +; CHECK-NEXT: mov p0.b, p2.b +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave.nxv16i1( %vec0, %vec1) + ret %retval +} + +define @insert_elements_s2_nxv8i1( %vec0, %vec1) { +; CHECK-LABEL: insert_elements_s2_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 p2.h, p0.h, p1.h +; CHECK-NEXT: zip1 p0.h, p0.h, p1.h +; CHECK-NEXT: uzp1 p0.b, p0.b, p2.b +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave.nxv8i1( %vec0, %vec1) + ret %retval +} + +define @insert_elements_s2_nxv4i1( %vec0, %vec1) { +; CHECK-LABEL: insert_elements_s2_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 p2.s, p0.s, p1.s +; CHECK-NEXT: zip1 p0.s, p0.s, p1.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p2.h +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave.nxv4i1( %vec0, %vec1) + ret %retval +} + +define @insert_elements_s2_nxv2i1( %vec0, %vec1) { +; CHECK-LABEL: insert_elements_s2_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2 p2.d, p0.d, p1.d +; CHECK-NEXT: zip1 p0.d, p0.d, p1.d +; CHECK-NEXT: uzp1 p0.s, p0.s, p2.s +; CHECK-NEXT: ret + %retval = call @llvm.experimental.vector.interleave.nxv2i1( %vec0, %vec1) + ret %retval +} + + +; Float declarations +declare @llvm.experimental.vector.interleave.nxv2f16(, ) +declare @llvm.experimental.vector.interleave.nxv4f16(, ) +declare @llvm.experimental.vector.interleave.nxv8f16(, ) +declare @llvm.experimental.vector.interleave.nxv2f32(, ) +declare @llvm.experimental.vector.interleave.nxv4f32(, ) +declare @llvm.experimental.vector.interleave.nxv2f64(, ) + +; Integer declarations +declare @llvm.experimental.vector.interleave.nxv16i8(, ) +declare @llvm.experimental.vector.interleave.nxv8i16(, ) +declare @llvm.experimental.vector.interleave.nxv4i32(, ) +declare @llvm.experimental.vector.interleave.nxv2i64(, ) + +; Predicated +declare @llvm.experimental.vector.interleave.nxv16i1(, ) +declare @llvm.experimental.vector.interleave.nxv8i1(, ) +declare @llvm.experimental.vector.interleave.nxv4i1(, ) +declare @llvm.experimental.vector.interleave.nxv2i1(, )