diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -610,6 +610,7 @@ SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -712,6 +712,9 @@ setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); } + // Splice + setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); + // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point // type that can represent the value exactly. if (VT.getVectorElementType() != MVT::i64) { @@ -790,6 +793,7 @@ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); setOperationAction(ISD::VECTOR_REVERSE, VT, Custom); + setOperationAction(ISD::VECTOR_SPLICE, VT, Custom); for (unsigned VPOpc : FloatingPointVPOps) setOperationAction(VPOpc, VT, Custom); @@ -3454,6 +3458,8 @@ return lowerSTEP_VECTOR(Op, DAG); case ISD::VECTOR_REVERSE: return lowerVECTOR_REVERSE(Op, DAG); + case ISD::VECTOR_SPLICE: + return lowerVECTOR_SPLICE(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::SPLAT_VECTOR: @@ -5537,6 +5543,43 @@ return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL); } +SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + MVT XLenVT = Subtarget.getXLenVT(); + MVT VecVT = Op.getSimpleValueType(); + + unsigned MinElts = VecVT.getVectorMinNumElements(); + SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT, + DAG.getConstant(MinElts, DL, XLenVT)); + + int64_t ImmValue = cast(Op.getOperand(2))->getSExtValue(); + SDValue DownOffset, UpOffset; + if (ImmValue >= 0) { + // The operand is a TargetConstant, we need to rebuild it as a regular + // constant. + DownOffset = DAG.getConstant(ImmValue, DL, XLenVT); + UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset); + } else { + // The operand is a TargetConstant, we need to rebuild it as a regular + // constant rather than negating the original operand. + UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT); + DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset); + } + + MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount()); + SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VLMax); + + SDValue SlideDown = + DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT, DAG.getUNDEF(VecVT), V1, + DownOffset, TrueMask, UpOffset); + return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VecVT, SlideDown, V2, UpOffset, + TrueMask, + DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT)); +} + SDValue RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll @@ -0,0 +1,2034 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zfh < %s | FileCheck %s + +; Tests assume VLEN=128 or vscale_range_min=2. + +declare @llvm.experimental.vector.splice.nxv1i8(, , i32) + +define @splice_nxv1i8_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i8_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i8( %a, %b, i32 0) + ret %res +} + +define @splice_nxv1i8_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i8_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i8( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv1i8_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i8_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i8( %a, %b, i32 -2) + ret %res +} + +define @splice_nxv1i8_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i8_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i8( %a, %b, i32 1) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv2i8(, , i32) + +define @splice_nxv2i8_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i8_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i8( %a, %b, i32 0) + ret %res +} + +define @splice_nxv2i8_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i8_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i8( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv2i8_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i8_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i8( %a, %b, i32 -4) + ret %res +} + +define @splice_nxv2i8_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i8_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -3 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i8( %a, %b, i32 3) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv4i8(, , i32) + +define @splice_nxv4i8_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i8_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i8( %a, %b, i32 0) + ret %res +} + +define @splice_nxv4i8_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i8_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i8( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv4i8_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i8_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 8 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i8( %a, %b, i32 -8) + ret %res +} + +define @splice_nxv4i8_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i8_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -7 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 7 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i8( %a, %b, i32 7) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv8i8(, , i32) + +define @splice_nxv8i8_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i8_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i8( %a, %b, i32 0) + ret %res +} + +define @splice_nxv8i8_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i8_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i8( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv8i8_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i8_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -16 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i8( %a, %b, i32 -16) + ret %res +} + +define @splice_nxv8i8_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i8_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -15 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 15 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i8( %a, %b, i32 15) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv16i8(, , i32) + +define @splice_nxv16i8_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16i8_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16i8( %a, %b, i32 0) + ret %res +} + +define @splice_nxv16i8_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16i8_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16i8( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv16i8_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16i8_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -32 +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v10, a1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16i8( %a, %b, i32 -32) + ret %res +} + +define @splice_nxv16i8_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16i8_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -31 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 31 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v10, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16i8( %a, %b, i32 31) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv32i8(, , i32) + +define @splice_nxv32i8_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32i8_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv32i8( %a, %b, i32 0) + ret %res +} + +define @splice_nxv32i8_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32i8_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v12, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv32i8( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv32i8_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32i8_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -64 +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v12, a1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv32i8( %a, %b, i32 -64) + ret %res +} + +define @splice_nxv32i8_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32i8_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -63 +; CHECK-NEXT: li a1, 63 +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, m4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v12, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv32i8( %a, %b, i32 63) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv64i8(, , i32) + +define @splice_nxv64i8_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv64i8_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv64i8( %a, %b, i32 0) + ret %res +} + +define @splice_nxv64i8_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv64i8_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m8, tu, mu +; CHECK-NEXT: vslideup.vi v8, v16, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv64i8( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv64i8_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv64i8_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -128 +; CHECK-NEXT: li a1, 128 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv64i8( %a, %b, i32 -128) + ret %res +} + +define @splice_nxv64i8_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv64i8_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -127 +; CHECK-NEXT: li a1, 127 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv64i8( %a, %b, i32 127) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv1i16(, , i32) + +define @splice_nxv1i16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv1i16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv1i16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -2 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i16( %a, %b, i32 -2) + ret %res +} + +define @splice_nxv1i16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i16( %a, %b, i32 1) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv2i16(, , i32) + +define @splice_nxv2i16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv2i16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv2i16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i16( %a, %b, i32 -4) + ret %res +} + +define @splice_nxv2i16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -3 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i16( %a, %b, i32 3) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv4i16(, , i32) + +define @splice_nxv4i16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv4i16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv4i16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 8 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i16( %a, %b, i32 -8) + ret %res +} + +define @splice_nxv4i16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -7 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 7 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i16( %a, %b, i32 7) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv8i16(, , i32) + +define @splice_nxv8i16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv8i16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv8i16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -16 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i16( %a, %b, i32 -16) + ret %res +} + +define @splice_nxv8i16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -15 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 15 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v10, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i16( %a, %b, i32 15) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv16i16(, , i32) + +define @splice_nxv16i16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16i16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16i16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv16i16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16i16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v12, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16i16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv16i16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16i16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -32 +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v12, a1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16i16( %a, %b, i32 -32) + ret %res +} + +define @splice_nxv16i16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16i16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -31 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 31 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v12, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16i16( %a, %b, i32 31) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv32i16(, , i32) + +define @splice_nxv32i16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32i16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv32i16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv32i16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32i16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, tu, mu +; CHECK-NEXT: vslideup.vi v8, v16, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv32i16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv32i16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32i16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -64 +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv32i16( %a, %b, i32 -64) + ret %res +} + +define @splice_nxv32i16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32i16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -63 +; CHECK-NEXT: li a1, 63 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv32i16( %a, %b, i32 63) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv1i32(, , i32) + +define @splice_nxv1i32_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i32_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i32( %a, %b, i32 0) + ret %res +} + +define @splice_nxv1i32_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i32_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i32( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv1i32_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i32_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i32( %a, %b, i32 -2) + ret %res +} + +define @splice_nxv1i32_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i32_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i32( %a, %b, i32 1) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv2i32(, , i32) + +define @splice_nxv2i32_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i32_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i32( %a, %b, i32 0) + ret %res +} + +define @splice_nxv2i32_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i32_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i32( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv2i32_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i32_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i32( %a, %b, i32 -4) + ret %res +} + +define @splice_nxv2i32_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i32_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -3 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i32( %a, %b, i32 3) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv4i32(, , i32) + +define @splice_nxv4i32_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i32_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i32( %a, %b, i32 0) + ret %res +} + +define @splice_nxv4i32_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i32_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i32( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv4i32_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i32_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -8 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 8 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i32( %a, %b, i32 -8) + ret %res +} + +define @splice_nxv4i32_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i32_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -7 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 7 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v10, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i32( %a, %b, i32 7) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv8i32(, , i32) + +define @splice_nxv8i32_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i32_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i32( %a, %b, i32 0) + ret %res +} + +define @splice_nxv8i32_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i32_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v12, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i32( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv8i32_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i32_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -16 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v12, 16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i32( %a, %b, i32 -16) + ret %res +} + +define @splice_nxv8i32_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i32_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -15 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 15 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v12, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i32( %a, %b, i32 15) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv16i32(, , i32) + +define @splice_nxv16i32_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16i32_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16i32( %a, %b, i32 0) + ret %res +} + +define @splice_nxv16i32_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16i32_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, tu, mu +; CHECK-NEXT: vslideup.vi v8, v16, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16i32( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv16i32_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16i32_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -32 +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16i32( %a, %b, i32 -32) + ret %res +} + +define @splice_nxv16i32_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16i32_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -31 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 31 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16i32( %a, %b, i32 31) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv1i64(, , i32) + +define @splice_nxv1i64_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i64_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i64( %a, %b, i32 0) + ret %res +} + +define @splice_nxv1i64_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i64_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i64( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv1i64_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i64_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -2 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i64( %a, %b, i32 -2) + ret %res +} + +define @splice_nxv1i64_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1i64_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1i64( %a, %b, i32 1) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv2i64(, , i32) + +define @splice_nxv2i64_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i64_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i64( %a, %b, i32 0) + ret %res +} + +define @splice_nxv2i64_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i64_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i64( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv2i64_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i64_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i64( %a, %b, i32 -4) + ret %res +} + +define @splice_nxv2i64_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2i64_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -3 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: vsetvli a1, zero, e64, m2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v10, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2i64( %a, %b, i32 3) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv4i64(, , i32) + +define @splice_nxv4i64_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i64_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i64( %a, %b, i32 0) + ret %res +} + +define @splice_nxv4i64_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i64_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v12, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i64( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv4i64_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i64_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -8 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v12, 8 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i64( %a, %b, i32 -8) + ret %res +} + +define @splice_nxv4i64_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4i64_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -7 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 7 +; CHECK-NEXT: vsetvli a1, zero, e64, m4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v12, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4i64( %a, %b, i32 7) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv8i64(, , i32) + +define @splice_nxv8i64_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i64_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i64( %a, %b, i32 0) + ret %res +} + +define @splice_nxv8i64_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i64_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, tu, mu +; CHECK-NEXT: vslideup.vi v8, v16, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i64( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv8i64_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i64_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -16 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, tu, mu +; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i64( %a, %b, i32 -16) + ret %res +} + +define @splice_nxv8i64_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8i64_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -15 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 15 +; CHECK-NEXT: vsetvli a1, zero, e64, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8i64( %a, %b, i32 15) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv1f16(, , i32) + +define @splice_nxv1f16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1f16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1f16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv1f16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1f16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1f16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv1f16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1f16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -2 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1f16( %a, %b, i32 -2) + ret %res +} + +define @splice_nxv1f16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1f16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1f16( %a, %b, i32 1) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv2f16(, , i32) + +define @splice_nxv2f16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2f16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2f16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv2f16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2f16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2f16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv2f16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2f16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2f16( %a, %b, i32 -4) + ret %res +} + +define @splice_nxv2f16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2f16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -3 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2f16( %a, %b, i32 3) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv4f16(, , i32) + +define @splice_nxv4f16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4f16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4f16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv4f16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4f16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4f16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv4f16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4f16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -8 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 8 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4f16( %a, %b, i32 -8) + ret %res +} + +define @splice_nxv4f16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4f16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -7 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 7 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4f16( %a, %b, i32 7) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv8f16(, , i32) + +define @splice_nxv8f16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8f16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8f16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv8f16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8f16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8f16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv8f16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8f16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -16 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8f16( %a, %b, i32 -16) + ret %res +} + +define @splice_nxv8f16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8f16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -15 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 15 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v10, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8f16( %a, %b, i32 15) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv16f16(, , i32) + +define @splice_nxv16f16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16f16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16f16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv16f16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16f16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v12, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16f16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv16f16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16f16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -32 +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v12, a1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16f16( %a, %b, i32 -32) + ret %res +} + +define @splice_nxv16f16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16f16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -31 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 31 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v12, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16f16( %a, %b, i32 31) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv32f16(, , i32) + +define @splice_nxv32f16_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32f16_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv32f16( %a, %b, i32 0) + ret %res +} + +define @splice_nxv32f16_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32f16_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, tu, mu +; CHECK-NEXT: vslideup.vi v8, v16, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv32f16( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv32f16_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32f16_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -64 +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv32f16( %a, %b, i32 -64) + ret %res +} + +define @splice_nxv32f16_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv32f16_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -63 +; CHECK-NEXT: li a1, 63 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv32f16( %a, %b, i32 63) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv1f32(, , i32) + +define @splice_nxv1f32_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1f32_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1f32( %a, %b, i32 0) + ret %res +} + +define @splice_nxv1f32_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1f32_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1f32( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv1f32_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1f32_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1f32( %a, %b, i32 -2) + ret %res +} + +define @splice_nxv1f32_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1f32_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1f32( %a, %b, i32 1) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv2f32(, , i32) + +define @splice_nxv2f32_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2f32_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2f32( %a, %b, i32 0) + ret %res +} + +define @splice_nxv2f32_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2f32_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2f32( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv2f32_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2f32_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2f32( %a, %b, i32 -4) + ret %res +} + +define @splice_nxv2f32_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2f32_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -3 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2f32( %a, %b, i32 3) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv4f32(, , i32) + +define @splice_nxv4f32_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4f32_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4f32( %a, %b, i32 0) + ret %res +} + +define @splice_nxv4f32_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4f32_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4f32( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv4f32_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4f32_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -8 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 8 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4f32( %a, %b, i32 -8) + ret %res +} + +define @splice_nxv4f32_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4f32_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -7 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 7 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v10, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4f32( %a, %b, i32 7) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv8f32(, , i32) + +define @splice_nxv8f32_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8f32_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8f32( %a, %b, i32 0) + ret %res +} + +define @splice_nxv8f32_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8f32_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v12, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8f32( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv8f32_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8f32_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -16 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v12, 16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8f32( %a, %b, i32 -16) + ret %res +} + +define @splice_nxv8f32_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8f32_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -15 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 15 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v12, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8f32( %a, %b, i32 15) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv16f32(, , i32) + +define @splice_nxv16f32_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16f32_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16f32( %a, %b, i32 0) + ret %res +} + +define @splice_nxv16f32_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16f32_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, tu, mu +; CHECK-NEXT: vslideup.vi v8, v16, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16f32( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv16f32_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16f32_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -32 +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16f32( %a, %b, i32 -32) + ret %res +} + +define @splice_nxv16f32_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv16f32_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -31 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 31 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv16f32( %a, %b, i32 31) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv1f64(, , i32) + +define @splice_nxv1f64_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1f64_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1f64( %a, %b, i32 0) + ret %res +} + +define @splice_nxv1f64_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1f64_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1f64( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv1f64_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1f64_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -2 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, tu, mu +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1f64( %a, %b, i32 -2) + ret %res +} + +define @splice_nxv1f64_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv1f64_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, tu, mu +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv1f64( %a, %b, i32 1) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv2f64(, , i32) + +define @splice_nxv2f64_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2f64_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2f64( %a, %b, i32 0) + ret %res +} + +define @splice_nxv2f64_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2f64_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2f64( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv2f64_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2f64_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -4 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, tu, mu +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2f64( %a, %b, i32 -4) + ret %res +} + +define @splice_nxv2f64_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv2f64_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: addi a0, a0, -3 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: vsetvli a1, zero, e64, m2, tu, mu +; CHECK-NEXT: vslideup.vx v8, v10, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv2f64( %a, %b, i32 3) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv4f64(, , i32) + +define @splice_nxv4f64_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4f64_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4f64( %a, %b, i32 0) + ret %res +} + +define @splice_nxv4f64_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4f64_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v12, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4f64( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv4f64_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4f64_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -8 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m4, tu, mu +; CHECK-NEXT: vslideup.vi v8, v12, 8 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4f64( %a, %b, i32 -8) + ret %res +} + +define @splice_nxv4f64_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv4f64_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 1 +; CHECK-NEXT: addi a0, a0, -7 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 7 +; CHECK-NEXT: vsetvli a1, zero, e64, m4, tu, mu +; CHECK-NEXT: vslideup.vx v8, v12, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv4f64( %a, %b, i32 7) + ret %res +} + +declare @llvm.experimental.vector.splice.nxv8f64(, , i32) + +define @splice_nxv8f64_offset_zero( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8f64_offset_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8f64( %a, %b, i32 0) + ret %res +} + +define @splice_nxv8f64_offset_negone( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8f64_offset_negone: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, tu, mu +; CHECK-NEXT: vslideup.vi v8, v16, 1 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8f64( %a, %b, i32 -1) + ret %res +} + +define @splice_nxv8f64_offset_min( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8f64_offset_min: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -16 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, tu, mu +; CHECK-NEXT: vslideup.vi v8, v16, 16 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8f64( %a, %b, i32 -16) + ret %res +} + +define @splice_nxv8f64_offset_max( %a, %b) #0 { +; CHECK-LABEL: splice_nxv8f64_offset_max: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: addi a0, a0, -15 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 15 +; CHECK-NEXT: vsetvli a1, zero, e64, m8, tu, mu +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.splice.nxv8f64( %a, %b, i32 15) + ret %res +} + +attributes #0 = { vscale_range(2,2) } +