diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15982,6 +15982,80 @@
 """"""""""
 The argument to this intrinsic must be a vector of floating-point values.
 
+'``llvm.vector.insert``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <vscale x 4 x float> @llvm.vector.insert.v4f32(<4 x float> %subvec, <vscale x 4 x float> %vec, i64 %idx)
+      declare <vscale x 2 x double> @llvm.vector.insert.v2f64(<2 x double> %subvec, <vscale x 2 x double> %vec, i64 %idx)
+
+Overview:
+"""""""""
+
+The '``llvm.vector.insert.*``' intrinsics insert a subvector into another vector
+at a given index. The return type matches the type of the vector we insert into.
+
+This operation supports inserting a fixed-width vector into a scalable vector,
+but not the other way around.
+
+Arguments:
+""""""""""
+
+The ``subvec`` is the vector that will be inserted.
+The ``vec`` is the vector which ``subvec`` will be inserted into.
+
+``idx`` represents the starting element number at which ``subvec`` will be
+inserted. ``idx`` must be a constant multiple of ``subvec``'s known minimum
+vector length. If ``subvec`` is a scalable vector, ``idx`` is first scaled by
+the runtime scaling factor of ``subvec``. The elements of ``vec`` starting at
+``idx`` are overwritten with ``subvec``. Elements ``idx`` through (``idx`` +
+num_elements(``subvec``) - 1) must be valid ``vec`` indices. If this condition
+cannot be determined statically but is false at runtime, then the result vector
+is undefined.
+
+
+'``llvm.vector.extract``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+      declare <4 x float> @llvm.vector.extract.v4f32(<vscale x 4 x float> %vec, i64 %idx)
+      declare <2 x double> @llvm.vector.extract.v2f64(<vscale x 2 x double> %vec, i64 %idx)
+
+Overview:
+"""""""""
+
+The '``llvm.vector.extract.*``' intrinsics extract a subvector from another
+vector starting from a given index. The return type must be explicitly
+specified.
+
+This operation supports extracting a fixed-width vector from a scalable vector,
+but not the other way around.
+
+Arguments:
+""""""""""
+
+The ``vec`` is the vector from which we will extract a subvector.
+
+The ``idx`` specifies the starting element number within ``vec`` from which a
+subvector is extracted. ``idx`` must be a constant multiple of the known-minimum
+vector length of the result type. If the result type is a scalable vector,
+``idx`` is first scaled by the result type's runtime scaling factor. Elements
+``idx`` through (``idx`` + num_elements(result_type) - 1) must be valid vector
+indices. If this condition cannot be determined statically but is false at
+runtime, then the result vector is undefined. The ``idx`` parameter must be a
+vector index constant type (for most targets this will be an integer pointer
+type).
+
 Matrix Intrinsics
 -----------------
 
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1625,6 +1625,15 @@
 //===---------- Intrinsics to query properties of scalable vectors --------===//
 def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
 
+//===---------- Intrinsics to perform subvector insertion/extraction ------===//
+def int_vector_insert : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                                              [llvm_anyvector_ty, LLVMMatchType<0>, llvm_i64_ty],
+                                              [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
+def int_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                                               [llvm_anyvector_ty, llvm_i64_ty],
+                                               [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6892,6 +6892,31 @@
                              SetCC));
     return;
   }
+  case Intrinsic::vector_insert: {
+    auto DL = getCurSDLoc();
+
+    SDValue SubVec = getValue(I.getOperand(0));
+    SDValue Vec = getValue(I.getOperand(1));
+    SDValue Index = getValue(I.getOperand(2));
+    EVT ResultVT = Vec.getValueType();
+    setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec,
+                             Index));
+    return;
+  }
+  case Intrinsic::vector_extract: {
+    auto DL = getCurSDLoc();
+
+    SDValue Vec = getValue(I.getOperand(0));
+    SDValue Index = getValue(I.getOperand(1));
+
+    Type *ElementTy = I.getOperand(0)->getType()->getScalarType();
+    unsigned VecWidth =
+        Vec.getValueType().getVectorElementCount().getKnownMinValue();
+    EVT ResultVT = EVT::getEVT(FixedVectorType::get(ElementTy, VecWidth));
+
+    setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
+    return;
+  }
   }
 }
 
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll
@@ -0,0 +1,125 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+; Should codegen to a nop, since idx is zero.
+define <2 x i64> @extract_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind {
+; CHECK-LABEL: extract_v2i64_nxv2i64:
+; CHECK-NEXT:  ret
+  %retval = call <2 x i64> @llvm.vector.extract.nxv2i64(<vscale x 2 x i64> %vec, i64 0)
+  ret <2 x i64> %retval
+}
+
+; Goes through memory currently; idx != 0.
+define <2 x i64> @extract_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec) nounwind {
+; CHECK-LABEL: extract_v2i64_nxv2i64_idx1:
+; CHECK-NEXT:  str [[RR:x[0-9]+]], [sp, #-16]!
+; CHECK-NEXT:  addvl sp, sp, #-1
+; CHECK-NEXT:  cntd [[REG1:x[0-9]+]]
+; CHECK-NEXT:  sub [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  cmp [[REG1]], #1
+; CHECK-NEXT:  ptrue [[PREDICATE_REG:p[0-9]+]].d
+; CHECK-NEXT:  csinc [[REG1]], [[REG1]], xzr, lo
+; CHECK-NEXT:  st1d { z0.d }, [[PREDICATE_REG]], [sp]
+; CHECK-NEXT:  lsl [[REG1]], [[REG1]], #3
+; CHECK-NEXT:  mov [[REG2:x[0-9]+]], sp
+; CHECK-NEXT:  ldr q0, [{{.*}}[[REG2]], [[REG1]]]
+; CHECK-NEXT:  addvl sp, sp, #1
+; CHECK-NEXT:  ldr [[RR]], [sp], #16
+; CHECK-NEXT:  ret
+%retval = call <2 x i64> @llvm.vector.extract.nxv2i64(<vscale x 2 x i64> %vec, i64 1)
+ret <2 x i64> %retval
+}
+
+; Should codegen to a nop, since idx is zero.
+define <4 x i32> @extract_v4i32_nxv4i32(<vscale x 4 x i32> %vec) nounwind {
+; CHECK-LABEL: extract_v4i32_nxv4i32:
+; CHECK-NEXT:  ret
+%retval = call <4 x i32> @llvm.vector.extract.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
+ret <4 x i32> %retval
+}
+
+; Goes through memory currently; idx != 0.
+define <4 x i32> @extract_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec) nounwind {
+; CHECK-LABEL: extract_v4i32_nxv4i32_idx1:
+; CHECK-NEXT:  str [[RR:x[0-9]+]], [sp, #-16]!
+; CHECK-NEXT:  addvl sp, sp, #-1
+; CHECK-NEXT:  cntw [[REG1:x[0-9]+]]
+; CHECK-NEXT:  sub [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  cmp [[REG1]], #1
+; CHECK-NEXT:  ptrue [[PREDICATE_REG:p[0-9]+]].s
+; CHECK-NEXT:  csinc [[REG1]], [[REG1]], xzr, lo
+; CHECK-NEXT:  st1w { z0.s }, [[PREDICATE_REG]], [sp]
+; CHECK-NEXT:  lsl [[REG1]], [[REG1]], #2
+; CHECK-NEXT:  mov [[REG2:x[0-9]+]], sp
+; CHECK-NEXT:  ldr q0, [{{.*}}[[REG2]], [[REG1]]]
+; CHECK-NEXT:  addvl sp, sp, #1
+; CHECK-NEXT:  ldr [[RR]], [sp], #16
+; CHECK-NEXT:  ret
+  %retval = call <4 x i32> @llvm.vector.extract.nxv4i32(<vscale x 4 x i32> %vec, i64 1)
+  ret <4 x i32> %retval
+}
+
+; Should codegen to a nop, since idx is zero.
+define <8 x i16> @extract_v8i16_nxv8i16(<vscale x 8 x i16> %vec) nounwind {
+; CHECK-LABEL: extract_v8i16_nxv8i16:
+; CHECK-NEXT:  ret
+  %retval = call <8 x i16> @llvm.vector.extract.nxv8i16(<vscale x 8 x i16> %vec, i64 0)
+  ret <8 x i16> %retval
+}
+
+; Goes through memory currently; idx != 0.
+define <8 x i16> @extract_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec) nounwind {
+; CHECK-LABEL: extract_v8i16_nxv8i16_idx1:
+; CHECK-NEXT:  str [[RR:x[0-9]+]], [sp, #-16]!
+; CHECK-NEXT:  addvl sp, sp, #-1
+; CHECK-NEXT:  cnth [[REG1:x[0-9]+]]
+; CHECK-NEXT:  sub [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  cmp [[REG1]], #1
+; CHECK-NEXT:  ptrue [[PREDICATE_REG:p[0-9]+]].h
+; CHECK-NEXT:  csinc [[REG1]], [[REG1]], xzr, lo
+; CHECK-NEXT:  st1h { z0.h }, [[PREDICATE_REG]], [sp]
+; CHECK-NEXT:  lsl [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  mov [[REG2:x[0-9]+]], sp
+; CHECK-NEXT:  ldr q0, [{{.*}}[[REG2]], [[REG1]]]
+; CHECK-NEXT:  addvl sp, sp, #1
+; CHECK-NEXT:  ldr [[RR]], [sp], #16
+; CHECK-NEXT:  ret
+  %retval = call <8 x i16> @llvm.vector.extract.nxv8i16(<vscale x 8 x i16> %vec, i64 1)
+  ret <8 x i16> %retval
+}
+
+; Should codegen to a nop, since idx is zero.
+define <16 x i8> @extract_v16i8_nxv16i8(<vscale x 16 x i8> %vec) nounwind {
+; CHECK-LABEL: extract_v16i8_nxv16i8:
+; CHECK-NEXT:  ret
+  %retval = call <16 x i8> @llvm.vector.extract.nxv16i8(<vscale x 16 x i8> %vec, i64 0)
+  ret <16 x i8> %retval
+}
+
+; Goes through memory currently; idx != 0.
+define <16 x i8> @extract_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec) nounwind {
+; CHECK-LABEL: extract_v16i8_nxv16i8_idx1:
+; CHECK-NEXT:  str [[RR:x[0-9]+]], [sp, #-16]!
+; CHECK-NEXT:  addvl sp, sp, #-1
+; CHECK-NEXT:  rdvl [[REG1:x[0-9]+]], #1
+; CHECK-NEXT:  sub [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  ptrue [[PREDICATE_REG:p[0-9]+]].b
+; CHECK-NEXT:  cmp [[REG1]], #1
+; CHECK-NEXT:  st1b { z0.b }, [[PREDICATE_REG]], [sp]
+; CHECK-NEXT:  csinc [[REG1]], [[REG1]], xzr, lo
+; CHECK-NEXT:  mov [[REG2:x[0-9]+]], sp
+; CHECK-NEXT:  ldr q0, [{{.*}}[[REG2]], [[REG1]]]
+; CHECK-NEXT:  addvl sp, sp, #1
+; CHECK-NEXT:  ldr [[RR]], [sp], #16
+; CHECK-NEXT:  ret
+  %retval = call <16 x i8> @llvm.vector.extract.nxv16i8(<vscale x 16 x i8> %vec, i64 1)
+  ret <16 x i8> %retval
+}
+
+declare <2 x i64> @llvm.vector.extract.nxv2i64(<vscale x 2 x i64>, i64)
+declare <4 x i32> @llvm.vector.extract.nxv4i32(<vscale x 4 x i32>, i64)
+declare <8 x i16> @llvm.vector.extract.nxv8i16(<vscale x 8 x i16>, i64)
+declare <16 x i8> @llvm.vector.extract.nxv16i8(<vscale x 16 x i8>, i64)
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -0,0 +1,179 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+define <vscale x 2 x i64> @insert_v2i64_nxv2i64(<2 x i64> %subvec, <vscale x 2 x i64> %vec) nounwind {
+; CHECK-LABEL: insert_v2i64_nxv2i64:
+; CHECK-NEXT:  str [[RR:x[0-9]+]], [sp, #-16]!
+; CHECK-NEXT:  addvl sp, sp, #-1
+; CHECK-NEXT:  cntd [[REG1:x[0-9]+]]
+; CHECK-NEXT:  sub [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  cmp [[REG1]], #0
+; CHECK-NEXT:  csel [[REG1]], [[REG1]], xzr, lo
+; CHECK-NEXT:  ptrue [[PREDICATE_REG:p[0-9]+]].d
+; CHECK-NEXT:  lsl [[REG1]], [[REG1]], #3
+; CHECK-NEXT:  mov [[REG2:x[0-9]+]], sp
+; CHECK-NEXT:  st1d { z1.d }, [[PREDICATE_REG]], [sp]
+; CHECK-NEXT:  str q0, [{{.*}}[[REG2]], [[REG1]]]
+; CHECK-NEXT:  ld1d { z0.d }, [[PREDICATE_REG]]/z, [sp]
+; CHECK-NEXT:  addvl sp, sp, #1
+; CHECK-NEXT:  ldr [[RR]], [sp], #16
+; CHECK-NEXT:  ret
+  %retval = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64(<2 x i64> %subvec, <vscale x 2 x i64> %vec, i64 0)
+  ret <vscale x 2 x i64> %retval
+}
+
+define <vscale x 2 x i64> @insert_v2i64_nxv2i64_idx1(<2 x i64> %subvec, <vscale x 2 x i64> %vec) nounwind {
+; CHECK-LABEL: insert_v2i64_nxv2i64_idx1:
+; CHECK-NEXT:  str [[RR:x[0-9]+]], [sp, #-16]!
+; CHECK-NEXT:  addvl sp, sp, #-1
+; CHECK-NEXT:  cntd [[REG1:x[0-9]+]]
+; CHECK-NEXT:  sub [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  cmp [[REG1]], #1
+; CHECK-NEXT:  csinc [[REG1]], [[REG1]], xzr, lo
+; CHECK-NEXT:  ptrue [[PREDICATE_REG:p[0-9]+]].d
+; CHECK-NEXT:  lsl [[REG1]], [[REG1]], #3
+; CHECK-NEXT:  mov [[REG2:x[0-9]+]], sp
+; CHECK-NEXT:  st1d { z1.d }, [[PREDICATE_REG]], [sp]
+; CHECK-NEXT:  str q0, [{{.*}}[[REG2]], [[REG1]]]
+; CHECK-NEXT:  ld1d { z0.d }, [[PREDICATE_REG]]/z, [sp]
+; CHECK-NEXT:  addvl sp, sp, #1
+; CHECK-NEXT:  ldr [[RR]], [sp], #16
+; CHECK-NEXT:  ret
+  %retval = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64(<2 x i64> %subvec, <vscale x 2 x i64> %vec, i64 1)
+  ret <vscale x 2 x i64> %retval
+}
+
+
+define <vscale x 4 x i32> @insert_v4i32_nxv4i32(<4 x i32> %subvec, <vscale x 4 x i32> %vec) nounwind {
+; CHECK-LABEL: insert_v4i32_nxv4i32:
+; CHECK-NEXT:  str [[RR:x[0-9]+]], [sp, #-16]!
+; CHECK-NEXT:  addvl sp, sp, #-1
+; CHECK-NEXT:  cntw [[REG1:x[0-9]+]]
+; CHECK-NEXT:  sub [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  cmp [[REG1]], #0
+; CHECK-NEXT:  csel [[REG1]], [[REG1]], xzr, lo
+; CHECK-NEXT:  ptrue [[PREDICATE_REG:p[0-9]+]].s
+; CHECK-NEXT:  lsl [[REG1]], [[REG1]], #2
+; CHECK-NEXT:  mov [[REG2:x[0-9]+]], sp
+; CHECK-NEXT:  st1w { z1.s }, [[PREDICATE_REG]], [sp]
+; CHECK-NEXT:  str q0, [{{.*}}[[REG2]], [[REG1]]]
+; CHECK-NEXT:  ld1w { z0.s }, [[PREDICATE_REG]]/z, [sp]
+; CHECK-NEXT:  addvl sp, sp, #1
+; CHECK-NEXT:  ldr [[RR]], [sp], #16
+; CHECK-NEXT:  ret
+  %retval = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32(<4 x i32> %subvec, <vscale x 4 x i32> %vec, i64 0)
+  ret <vscale x 4 x i32> %retval
+}
+
+define <vscale x 4 x i32> @insert_v4i32_nxv4i32_idx1(<4 x i32> %subvec, <vscale x 4 x i32> %vec) nounwind {
+; CHECK-LABEL: insert_v4i32_nxv4i32_idx1:
+; CHECK-NEXT:  str [[RR:x[0-9]+]], [sp, #-16]!
+; CHECK-NEXT:  addvl sp, sp, #-1
+; CHECK-NEXT:  cntw [[REG1:x[0-9]+]]
+; CHECK-NEXT:  sub [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  cmp [[REG1]], #1
+; CHECK-NEXT:  csinc [[REG1]], [[REG1]], xzr, lo
+; CHECK-NEXT:  ptrue [[PREDICATE_REG:p[0-9]+]].s
+; CHECK-NEXT:  lsl [[REG1]], [[REG1]], #2
+; CHECK-NEXT:  mov [[REG2:x[0-9]+]], sp
+; CHECK-NEXT:  st1w { z1.s }, [[PREDICATE_REG]], [sp]
+; CHECK-NEXT:  str q0, [{{.*}}[[REG2]], [[REG1]]]
+; CHECK-NEXT:  ld1w { z0.s }, [[PREDICATE_REG]]/z, [sp]
+; CHECK-NEXT:  addvl sp, sp, #1
+; CHECK-NEXT:  ldr [[RR]], [sp], #16
+; CHECK-NEXT:  ret
+  %retval = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32(<4 x i32> %subvec, <vscale x 4 x i32> %vec, i64 1)
+  ret <vscale x 4 x i32> %retval
+}
+
+
+define <vscale x 8 x i16> @insert_v8i16_nxv8i16(<8 x i16> %subvec, <vscale x 8 x i16> %vec) nounwind {
+; CHECK-LABEL: insert_v8i16_nxv8i16:
+; CHECK-NEXT:  str [[RR:x[0-9]+]], [sp, #-16]!
+; CHECK-NEXT:  addvl sp, sp, #-1
+; CHECK-NEXT:  cnth [[REG1:x[0-9]+]]
+; CHECK-NEXT:  sub [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  cmp [[REG1]], #0
+; CHECK-NEXT:  csel [[REG1]], [[REG1]], xzr, lo
+; CHECK-NEXT:  ptrue [[PREDICATE_REG:p[0-9]+]].h
+; CHECK-NEXT:  lsl [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  mov [[REG2:x[0-9]+]], sp
+; CHECK-NEXT:  st1h { z1.h }, [[PREDICATE_REG]], [sp]
+; CHECK-NEXT:  str q0, [{{.*}}[[REG2]], [[REG1]]]
+; CHECK-NEXT:  ld1h { z0.h }, [[PREDICATE_REG]]/z, [sp]
+; CHECK-NEXT:  addvl sp, sp, #1
+; CHECK-NEXT:  ldr [[RR]], [sp], #16
+; CHECK-NEXT:  ret
+  %retval = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16(<8 x i16> %subvec, <vscale x 8 x i16> %vec, i64 0)
+  ret <vscale x 8 x i16> %retval
+}
+
+define <vscale x 8 x i16> @insert_v8i16_nxv8i16_idx1(<8 x i16> %subvec, <vscale x 8 x i16> %vec) nounwind {
+; CHECK-LABEL: insert_v8i16_nxv8i16_idx1:
+; CHECK-NEXT:  str [[RR:x[0-9]+]], [sp, #-16]!
+; CHECK-NEXT:  addvl sp, sp, #-1
+; CHECK-NEXT:  cnth [[REG1:x[0-9]+]]
+; CHECK-NEXT:  sub [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  cmp [[REG1]], #1
+; CHECK-NEXT:  csinc [[REG1]], [[REG1]], xzr, lo
+; CHECK-NEXT:  ptrue [[PREDICATE_REG:p[0-9]+]].h
+; CHECK-NEXT:  lsl [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  mov [[REG2:x[0-9]+]], sp
+; CHECK-NEXT:  st1h { z1.h }, [[PREDICATE_REG]], [sp]
+; CHECK-NEXT:  str q0, [{{.*}}[[REG2]], [[REG1]]]
+; CHECK-NEXT:  ld1h { z0.h }, [[PREDICATE_REG]]/z, [sp]
+; CHECK-NEXT:  addvl sp, sp, #1
+; CHECK-NEXT:  ldr [[RR]], [sp], #16
+; CHECK-NEXT:  ret
+  %retval = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16(<8 x i16> %subvec, <vscale x 8 x i16> %vec, i64 1)
+  ret <vscale x 8 x i16> %retval
+}
+
+
+define <vscale x 16 x i8> @insert_v16i8_nxv16i8(<16 x i8> %subvec, <vscale x 16 x i8> %vec) nounwind {
+; CHECK-LABEL: insert_v16i8_nxv16i8:
+; CHECK-NEXT:  str [[RR:x[0-9]+]], [sp, #-16]!
+; CHECK-NEXT:  addvl sp, sp, #-1
+; CHECK-NEXT:  rdvl [[REG1:x[0-9]+]], #1
+; CHECK-NEXT:  sub [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  cmp [[REG1]], #0
+; CHECK-NEXT:  ptrue [[PREDICATE_REG:p[0-9]+]].b
+; CHECK-NEXT:  csel [[REG1]], [[REG1]], xzr, lo
+; CHECK-NEXT:  mov [[REG2:x[0-9]+]], sp
+; CHECK-NEXT:  st1b { z1.b }, [[PREDICATE_REG]], [sp]
+; CHECK-NEXT:  str q0, [{{.*}}[[REG2]], [[REG1]]]
+; CHECK-NEXT:  ld1b { z0.b }, [[PREDICATE_REG]]/z, [sp]
+; CHECK-NEXT:  addvl sp, sp, #1
+; CHECK-NEXT:  ldr [[RR]], [sp], #16
+; CHECK-NEXT:  ret
+  %retval = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8(<16 x i8> %subvec, <vscale x 16 x i8> %vec, i64 0)
+  ret <vscale x 16 x i8> %retval
+}
+
+define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx1(<16 x i8> %subvec, <vscale x 16 x i8> %vec) nounwind {
+; CHECK-LABEL: insert_v16i8_nxv16i8_idx1:
+; CHECK-NEXT:  str [[RR:x[0-9]+]], [sp, #-16]!
+; CHECK-NEXT:  addvl sp, sp, #-1
+; CHECK-NEXT:  rdvl [[REG1:x[0-9]+]], #1
+; CHECK-NEXT:  sub [[REG1]], [[REG1]], #1
+; CHECK-NEXT:  cmp [[REG1]], #1
+; CHECK-NEXT:  ptrue [[PREDICATE_REG:p[0-9]+]].b
+; CHECK-NEXT:  csinc [[REG1]], [[REG1]], xzr, lo
+; CHECK-NEXT:  mov [[REG2:x[0-9]+]], sp
+; CHECK-NEXT:  st1b { z1.b }, [[PREDICATE_REG]], [sp]
+; CHECK-NEXT:  str q0, [{{.*}}[[REG2]], [[REG1]]]
+; CHECK-NEXT:  ld1b { z0.b }, [[PREDICATE_REG]]/z, [sp]
+; CHECK-NEXT:  addvl sp, sp, #1
+; CHECK-NEXT:  ldr [[RR]], [sp], #16
+; CHECK-NEXT:  ret
+  %retval = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8(<16 x i8> %subvec, <vscale x 16 x i8> %vec, i64 1)
+  ret <vscale x 16 x i8> %retval
+}
+
+
+declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64(<2 x i64>, <vscale x 2 x i64>, i64)
+declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32(<4 x i32>, <vscale x 4 x i32>, i64)
+declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16(<8 x i16>, <vscale x 8 x i16>, i64)
+declare <vscale x 16 x i8> @llvm.vector.insert.nxv16i8(<16 x i8>, <vscale x 16 x i8>, i64)