diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15982,6 +15982,80 @@ """""""""" The argument to this intrinsic must be a vector of floating-point values. +'``llvm.vector.insert``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare @llvm.vector.insert.v4f32(<4 x float> %subvec, %vec, i64 %idx) + declare @llvm.vector.insert.v2f64(<2 x double> %subvec, %vec, i64 %idx) + +Overview: +""""""""" + +The '``llvm.vector.insert.*``' intrinsics insert a subvector into another vector +at a given index. The return type matches the type of the vector we insert into. + +This operation supports inserting a fixed-width vector into a scalable vector, +but not the other way around. + +Arguments: +"""""""""" + +The ``subvec`` is the vector that will be inserted. +The ``vec`` is the vector which ``subvec`` will be inserted into. + +``idx`` represents the starting element number at which ``subvec`` will be +inserted. ``idx`` must be a constant multiple of ``subvec``'s known minimum +vector length. If ``subvec`` is a scalable vector, ``idx`` is first scaled by +the runtime scaling factor of ``subvec``. The elements of ``vec`` starting at +``idx`` are overwritten with ``subvec``. Elements ``idx`` through (``idx`` + +num_elements(``subvec``) - 1) must be valid ``vec`` indices. If this condition +cannot be determined statically but is false at runtime, then the result vector +is undefined. + + +'``llvm.vector.extract``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <4 x float> @llvm.vector.extract.v4f32( %vec, i64 %idx) + declare <2 x double> @llvm.vector.extract.v2f64( %vec, i64 %idx) + +Overview: +""""""""" + +The '``llvm.vector.extract.*``' intrinsics extract a subvector from another +vector starting from a given index. The return type must be explicitly +specified. + +This operation supports extracting a fixed-width vector from a scalable vector, +but not the other way around. + +Arguments: +"""""""""" + +The ``vec`` is the vector from which we will extract a subvector. + +The ``idx`` specifies the starting element number within ``vec`` from which a +subvector is extracted. ``idx`` must be a constant multiple of the known-minimum +vector length of the result type. If the result type is a scalable vector, +``idx`` is first scaled by the result type's runtime scaling factor. Elements +``idx`` through (``idx`` + num_elements(result_type) - 1) must be valid vector +indices. If this condition cannot be determined statically but is false at +runtime, then the result vector is undefined. The ``idx`` parameter must be a +vector index constant type (for most targets this will be an integer pointer +type). + Matrix Intrinsics ----------------- diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1625,6 +1625,15 @@ //===---------- Intrinsics to query properties of scalable vectors --------===// def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; +//===---------- Intrinsics to perform subvector insertion/extraction ------===// +def int_vector_insert : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<0>, llvm_i64_ty], + [IntrNoMem, ImmArg>]>; + +def int_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, llvm_i64_ty], + [IntrNoMem, ImmArg>]>; + //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6892,6 +6892,31 @@ SetCC)); return; } + case Intrinsic::vector_insert: { + auto DL = getCurSDLoc(); + + SDValue SubVec = getValue(I.getOperand(0)); + SDValue Vec = getValue(I.getOperand(1)); + SDValue Index = getValue(I.getOperand(2)); + EVT ResultVT = Vec.getValueType(); + setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec, + Index)); + return; + } + case Intrinsic::vector_extract: { + auto DL = getCurSDLoc(); + + SDValue Vec = getValue(I.getOperand(0)); + SDValue Index = getValue(I.getOperand(1)); + + Type *ElementTy = I.getOperand(0)->getType()->getScalarType(); + unsigned VecWidth = + Vec.getValueType().getVectorElementCount().getKnownMinValue(); + EVT ResultVT = EVT::getEVT(FixedVectorType::get(ElementTy, VecWidth)); + + setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index)); + return; + } } } diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll @@ -0,0 +1,125 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +; Should codegen to a nop, since idx is zero. +define <2 x i64> @extract_v2i64_nxv2i64( %vec) nounwind { +; CHECK-LABEL: extract_v2i64_nxv2i64: +; CHECK-NEXT: ret + %retval = call <2 x i64> @llvm.vector.extract.nxv2i64( %vec, i64 0) + ret <2 x i64> %retval +} + +; Goes through memory currently; idx != 0. +define <2 x i64> @extract_v2i64_nxv2i64_idx1( %vec) nounwind { +; CHECK-LABEL: extract_v2i64_nxv2i64_idx1: +; CHECK-NEXT: str [[RR:x[0-9]+]], [sp, #-16]! +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd [[REG1:x[0-9]+]] +; CHECK-NEXT: sub [[REG1]], [[REG1]], #1 +; CHECK-NEXT: cmp [[REG1]], #1 +; CHECK-NEXT: ptrue [[PREDICATE_REG:p[0-9]+]].d +; CHECK-NEXT: csinc [[REG1]], [[REG1]], xzr, lo +; CHECK-NEXT: st1d { z0.d }, [[PREDICATE_REG]], [sp] +; CHECK-NEXT: lsl [[REG1]], [[REG1]], #3 +; CHECK-NEXT: mov [[REG2:x[0-9]+]], sp +; CHECK-NEXT: ldr q0, [{{.*}}[[REG2]], [[REG1]]] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr [[RR]], [sp], #16 +; CHECK-NEXT: ret +%retval = call <2 x i64> @llvm.vector.extract.nxv2i64( %vec, i64 1) +ret <2 x i64> %retval +} + +; Should codegen to a nop, since idx is zero. +define <4 x i32> @extract_v4i32_nxv4i32( %vec) nounwind { +; CHECK-LABEL: extract_v4i32_nxv4i32: +; CHECK-NEXT: ret +%retval = call <4 x i32> @llvm.vector.extract.nxv4i32( %vec, i64 0) +ret <4 x i32> %retval +} + +; Goes through memory currently; idx != 0. +define <4 x i32> @extract_v4i32_nxv4i32_idx1( %vec) nounwind { +; CHECK-LABEL: extract_v4i32_nxv4i32_idx1: +; CHECK-NEXT: str [[RR:x[0-9]+]], [sp, #-16]! +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntw [[REG1:x[0-9]+]] +; CHECK-NEXT: sub [[REG1]], [[REG1]], #1 +; CHECK-NEXT: cmp [[REG1]], #1 +; CHECK-NEXT: ptrue [[PREDICATE_REG:p[0-9]+]].s +; CHECK-NEXT: csinc [[REG1]], [[REG1]], xzr, lo +; CHECK-NEXT: st1w { z0.s }, [[PREDICATE_REG]], [sp] +; CHECK-NEXT: lsl [[REG1]], [[REG1]], #2 +; CHECK-NEXT: mov [[REG2:x[0-9]+]], sp +; CHECK-NEXT: ldr q0, [{{.*}}[[REG2]], [[REG1]]] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr [[RR]], [sp], #16 +; CHECK-NEXT: ret + %retval = call <4 x i32> @llvm.vector.extract.nxv4i32( %vec, i64 1) + ret <4 x i32> %retval +} + +; Should codegen to a nop, since idx is zero. +define <8 x i16> @extract_v8i16_nxv8i16( %vec) nounwind { +; CHECK-LABEL: extract_v8i16_nxv8i16: +; CHECK-NEXT: ret + %retval = call <8 x i16> @llvm.vector.extract.nxv8i16( %vec, i64 0) + ret <8 x i16> %retval +} + +; Goes through memory currently; idx != 0. +define <8 x i16> @extract_v8i16_nxv8i16_idx1( %vec) nounwind { +; CHECK-LABEL: extract_v8i16_nxv8i16_idx1: +; CHECK-NEXT: str [[RR:x[0-9]+]], [sp, #-16]! +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cnth [[REG1:x[0-9]+]] +; CHECK-NEXT: sub [[REG1]], [[REG1]], #1 +; CHECK-NEXT: cmp [[REG1]], #1 +; CHECK-NEXT: ptrue [[PREDICATE_REG:p[0-9]+]].h +; CHECK-NEXT: csinc [[REG1]], [[REG1]], xzr, lo +; CHECK-NEXT: st1h { z0.h }, [[PREDICATE_REG]], [sp] +; CHECK-NEXT: lsl [[REG1]], [[REG1]], #1 +; CHECK-NEXT: mov [[REG2:x[0-9]+]], sp +; CHECK-NEXT: ldr q0, [{{.*}}[[REG2]], [[REG1]]] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr [[RR]], [sp], #16 +; CHECK-NEXT: ret + %retval = call <8 x i16> @llvm.vector.extract.nxv8i16( %vec, i64 1) + ret <8 x i16> %retval +} + +; Should codegen to a nop, since idx is zero. +define <16 x i8> @extract_v16i8_nxv16i8( %vec) nounwind { +; CHECK-LABEL: extract_v16i8_nxv16i8: +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.vector.extract.nxv16i8( %vec, i64 0) + ret <16 x i8> %retval +} + +; Goes through memory currently; idx != 0. +define <16 x i8> @extract_v16i8_nxv16i8_idx1( %vec) nounwind { +; CHECK-LABEL: extract_v16i8_nxv16i8_idx1: +; CHECK-NEXT: str [[RR:x[0-9]+]], [sp, #-16]! +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: rdvl [[REG1:x[0-9]+]], #1 +; CHECK-NEXT: sub [[REG1]], [[REG1]], #1 +; CHECK-NEXT: ptrue [[PREDICATE_REG:p[0-9]+]].b +; CHECK-NEXT: cmp [[REG1]], #1 +; CHECK-NEXT: st1b { z0.b }, [[PREDICATE_REG]], [sp] +; CHECK-NEXT: csinc [[REG1]], [[REG1]], xzr, lo +; CHECK-NEXT: mov [[REG2:x[0-9]+]], sp +; CHECK-NEXT: ldr q0, [{{.*}}[[REG2]], [[REG1]]] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr [[RR]], [sp], #16 +; CHECK-NEXT: ret + %retval = call <16 x i8> @llvm.vector.extract.nxv16i8( %vec, i64 1) + ret <16 x i8> %retval +} + +declare <2 x i64> @llvm.vector.extract.nxv2i64(, i64) +declare <4 x i32> @llvm.vector.extract.nxv4i32(, i64) +declare <8 x i16> @llvm.vector.extract.nxv8i16(, i64) +declare <16 x i8> @llvm.vector.extract.nxv16i8(, i64) diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -0,0 +1,179 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t +; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. +; WARN-NOT: warning + +define @insert_v2i64_nxv2i64(<2 x i64> %subvec, %vec) nounwind { +; CHECK-LABEL: insert_v2i64_nxv2i64: +; CHECK-NEXT: str [[RR:x[0-9]+]], [sp, #-16]! +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd [[REG1:x[0-9]+]] +; CHECK-NEXT: sub [[REG1]], [[REG1]], #1 +; CHECK-NEXT: cmp [[REG1]], #0 +; CHECK-NEXT: csel [[REG1]], [[REG1]], xzr, lo +; CHECK-NEXT: ptrue [[PREDICATE_REG:p[0-9]+]].d +; CHECK-NEXT: lsl [[REG1]], [[REG1]], #3 +; CHECK-NEXT: mov [[REG2:x[0-9]+]], sp +; CHECK-NEXT: st1d { z1.d }, [[PREDICATE_REG]], [sp] +; CHECK-NEXT: str q0, [{{.*}}[[REG2]], [[REG1]]] +; CHECK-NEXT: ld1d { z0.d }, [[PREDICATE_REG]]/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr [[RR]], [sp], #16 +; CHECK-NEXT: ret + %retval = call @llvm.vector.insert.nxv2i64(<2 x i64> %subvec, %vec, i64 0) + ret %retval +} + +define @insert_v2i64_nxv2i64_idx1(<2 x i64> %subvec, %vec) nounwind { +; CHECK-LABEL: insert_v2i64_nxv2i64_idx1: +; CHECK-NEXT: str [[RR:x[0-9]+]], [sp, #-16]! +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntd [[REG1:x[0-9]+]] +; CHECK-NEXT: sub [[REG1]], [[REG1]], #1 +; CHECK-NEXT: cmp [[REG1]], #1 +; CHECK-NEXT: csinc [[REG1]], [[REG1]], xzr, lo +; CHECK-NEXT: ptrue [[PREDICATE_REG:p[0-9]+]].d +; CHECK-NEXT: lsl [[REG1]], [[REG1]], #3 +; CHECK-NEXT: mov [[REG2:x[0-9]+]], sp +; CHECK-NEXT: st1d { z1.d }, [[PREDICATE_REG]], [sp] +; CHECK-NEXT: str q0, [{{.*}}[[REG2]], [[REG1]]] +; CHECK-NEXT: ld1d { z0.d }, [[PREDICATE_REG]]/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr [[RR]], [sp], #16 +; CHECK-NEXT: ret + %retval = call @llvm.vector.insert.nxv2i64(<2 x i64> %subvec, %vec, i64 1) + ret %retval +} + + +define @insert_v4i32_nxv4i32(<4 x i32> %subvec, %vec) nounwind { +; CHECK-LABEL: insert_v4i32_nxv4i32: +; CHECK-NEXT: str [[RR:x[0-9]+]], [sp, #-16]! +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntw [[REG1:x[0-9]+]] +; CHECK-NEXT: sub [[REG1]], [[REG1]], #1 +; CHECK-NEXT: cmp [[REG1]], #0 +; CHECK-NEXT: csel [[REG1]], [[REG1]], xzr, lo +; CHECK-NEXT: ptrue [[PREDICATE_REG:p[0-9]+]].s +; CHECK-NEXT: lsl [[REG1]], [[REG1]], #2 +; CHECK-NEXT: mov [[REG2:x[0-9]+]], sp +; CHECK-NEXT: st1w { z1.s }, [[PREDICATE_REG]], [sp] +; CHECK-NEXT: str q0, [{{.*}}[[REG2]], [[REG1]]] +; CHECK-NEXT: ld1w { z0.s }, [[PREDICATE_REG]]/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr [[RR]], [sp], #16 +; CHECK-NEXT: ret + %retval = call @llvm.vector.insert.nxv4i32(<4 x i32> %subvec, %vec, i64 0) + ret %retval +} + +define @insert_v4i32_nxv4i32_idx1(<4 x i32> %subvec, %vec) nounwind { +; CHECK-LABEL: insert_v4i32_nxv4i32_idx1: +; CHECK-NEXT: str [[RR:x[0-9]+]], [sp, #-16]! +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cntw [[REG1:x[0-9]+]] +; CHECK-NEXT: sub [[REG1]], [[REG1]], #1 +; CHECK-NEXT: cmp [[REG1]], #1 +; CHECK-NEXT: csinc [[REG1]], [[REG1]], xzr, lo +; CHECK-NEXT: ptrue [[PREDICATE_REG:p[0-9]+]].s +; CHECK-NEXT: lsl [[REG1]], [[REG1]], #2 +; CHECK-NEXT: mov [[REG2:x[0-9]+]], sp +; CHECK-NEXT: st1w { z1.s }, [[PREDICATE_REG]], [sp] +; CHECK-NEXT: str q0, [{{.*}}[[REG2]], [[REG1]]] +; CHECK-NEXT: ld1w { z0.s }, [[PREDICATE_REG]]/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr [[RR]], [sp], #16 +; CHECK-NEXT: ret + %retval = call @llvm.vector.insert.nxv4i32(<4 x i32> %subvec, %vec, i64 1) + ret %retval +} + + +define @insert_v8i16_nxv8i16(<8 x i16> %subvec, %vec) nounwind { +; CHECK-LABEL: insert_v8i16_nxv8i16: +; CHECK-NEXT: str [[RR:x[0-9]+]], [sp, #-16]! +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cnth [[REG1:x[0-9]+]] +; CHECK-NEXT: sub [[REG1]], [[REG1]], #1 +; CHECK-NEXT: cmp [[REG1]], #0 +; CHECK-NEXT: csel [[REG1]], [[REG1]], xzr, lo +; CHECK-NEXT: ptrue [[PREDICATE_REG:p[0-9]+]].h +; CHECK-NEXT: lsl [[REG1]], [[REG1]], #1 +; CHECK-NEXT: mov [[REG2:x[0-9]+]], sp +; CHECK-NEXT: st1h { z1.h }, [[PREDICATE_REG]], [sp] +; CHECK-NEXT: str q0, [{{.*}}[[REG2]], [[REG1]]] +; CHECK-NEXT: ld1h { z0.h }, [[PREDICATE_REG]]/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr [[RR]], [sp], #16 +; CHECK-NEXT: ret + %retval = call @llvm.vector.insert.nxv8i16(<8 x i16> %subvec, %vec, i64 0) + ret %retval +} + +define @insert_v8i16_nxv8i16_idx1(<8 x i16> %subvec, %vec) nounwind { +; CHECK-LABEL: insert_v8i16_nxv8i16_idx1: +; CHECK-NEXT: str [[RR:x[0-9]+]], [sp, #-16]! +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: cnth [[REG1:x[0-9]+]] +; CHECK-NEXT: sub [[REG1]], [[REG1]], #1 +; CHECK-NEXT: cmp [[REG1]], #1 +; CHECK-NEXT: csinc [[REG1]], [[REG1]], xzr, lo +; CHECK-NEXT: ptrue [[PREDICATE_REG:p[0-9]+]].h +; CHECK-NEXT: lsl [[REG1]], [[REG1]], #1 +; CHECK-NEXT: mov [[REG2:x[0-9]+]], sp +; CHECK-NEXT: st1h { z1.h }, [[PREDICATE_REG]], [sp] +; CHECK-NEXT: str q0, [{{.*}}[[REG2]], [[REG1]]] +; CHECK-NEXT: ld1h { z0.h }, [[PREDICATE_REG]]/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr [[RR]], [sp], #16 +; CHECK-NEXT: ret + %retval = call @llvm.vector.insert.nxv8i16(<8 x i16> %subvec, %vec, i64 1) + ret %retval +} + + +define @insert_v16i8_nxv16i8(<16 x i8> %subvec, %vec) nounwind { +; CHECK-LABEL: insert_v16i8_nxv16i8: +; CHECK-NEXT: str [[RR:x[0-9]+]], [sp, #-16]! +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: rdvl [[REG1:x[0-9]+]], #1 +; CHECK-NEXT: sub [[REG1]], [[REG1]], #1 +; CHECK-NEXT: cmp [[REG1]], #0 +; CHECK-NEXT: ptrue [[PREDICATE_REG:p[0-9]+]].b +; CHECK-NEXT: csel [[REG1]], [[REG1]], xzr, lo +; CHECK-NEXT: mov [[REG2:x[0-9]+]], sp +; CHECK-NEXT: st1b { z1.b }, [[PREDICATE_REG]], [sp] +; CHECK-NEXT: str q0, [{{.*}}[[REG2]], [[REG1]]] +; CHECK-NEXT: ld1b { z0.b }, [[PREDICATE_REG]]/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr [[RR]], [sp], #16 +; CHECK-NEXT: ret + %retval = call @llvm.vector.insert.nxv16i8(<16 x i8> %subvec, %vec, i64 0) + ret %retval +} + +define @insert_v16i8_nxv16i8_idx1(<16 x i8> %subvec, %vec) nounwind { +; CHECK-LABEL: insert_v16i8_nxv16i8_idx1: +; CHECK-NEXT: str [[RR:x[0-9]+]], [sp, #-16]! +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: rdvl [[REG1:x[0-9]+]], #1 +; CHECK-NEXT: sub [[REG1]], [[REG1]], #1 +; CHECK-NEXT: cmp [[REG1]], #1 +; CHECK-NEXT: ptrue [[PREDICATE_REG:p[0-9]+]].b +; CHECK-NEXT: csinc [[REG1]], [[REG1]], xzr, lo +; CHECK-NEXT: mov [[REG2:x[0-9]+]], sp +; CHECK-NEXT: st1b { z1.b }, [[PREDICATE_REG]], [sp] +; CHECK-NEXT: str q0, [{{.*}}[[REG2]], [[REG1]]] +; CHECK-NEXT: ld1b { z0.b }, [[PREDICATE_REG]]/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr [[RR]], [sp], #16 +; CHECK-NEXT: ret + %retval = call @llvm.vector.insert.nxv16i8(<16 x i8> %subvec, %vec, i64 1) + ret %retval +} + + +declare @llvm.vector.insert.nxv2i64(<2 x i64>, , i64) +declare @llvm.vector.insert.nxv4i32(<4 x i32>, , i64) +declare @llvm.vector.insert.nxv8i16(<8 x i16>, , i64) +declare @llvm.vector.insert.nxv16i8(<16 x i8>, , i64)