Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -21485,10 +21485,13 @@
   // Try to move vector bitcast after extract_subv by scaling extraction index:
   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
   if (V.getOpcode() == ISD::BITCAST &&
-      V.getOperand(0).getValueType().isVector() &&
+      V.getOperand(0).getValueType().isFixedLengthVector() &&
       (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
     SDValue SrcOp = V.getOperand(0);
     EVT SrcVT = SrcOp.getValueType();
+    // For scalable vectors, we purposely add the bitcasts, and only deal
+    // with integer extract_subvector. So we don't reorder those particular
+    // bitcasts.
     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
     if ((SrcNumElts % DestNumElts) == 0) {
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14928,11 +14928,28 @@
 static SDValue
 performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
                                SelectionDAG &DAG) {
+  EVT InVT = N->getOperand(0).getValueType();
+  EVT OutVT = N->getValueType(0);
+  SDLoc DL(N);
+  // Reorder when the scalable vector's inner type is floating point and the
+  // outer type is not scalable vector
+  if (InVT.isScalableVector() && InVT.isFloatingPoint() &&
+      DCI.isBeforeLegalize() && !OutVT.isScalableVector()) {
+    // Bitcast the input
+    SDValue VecOp = N->getOperand(0);
+    VecOp = DAG.getNode(ISD::BITCAST, DL, InVT.changeTypeToInteger(), VecOp);
+    // Perform extract in integer type
+    SDValue Extract =
+        DAG.getNode(N->getOpcode(), DL, OutVT.changeTypeToInteger(), VecOp,
+                    N->getOperand(1));
+    // Bitcast back to fp type
+    return DAG.getNode(ISD::BITCAST, DL, OutVT, Extract);
+  }
+
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  EVT VT = N->getValueType(0);
-  if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
+  if (!OutVT.isScalableVector() || OutVT.getVectorElementType() != MVT::i1)
     return SDValue();
 
   SDValue V = N->getOperand(0);
@@ -14943,7 +14960,7 @@
   // ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const)
   if (V.getOpcode() == ISD::SPLAT_VECTOR)
     if (isa<ConstantSDNode>(V.getOperand(0)))
-      return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V.getOperand(0));
+      return DAG.getNode(ISD::SPLAT_VECTOR, DL, OutVT, V.getOperand(0));
 
   return SDValue();
 }
Index: llvm/test/CodeGen/AArch64/extract-insert-element-sve.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/extract-insert-element-sve.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 -mattr=+sve -asm-verbose=1 < %s | FileCheck %s
+
+; Extract from packed SVE vectors into different sizes of NEON registers.
+
+define <2 x float> @extract_subreg_2f32_unpacked_nx2xf32(<vscale x 2 x float> %vec) nounwind {
+; CHECK-LABEL: extract_subreg_2f32_unpacked_nx2xf32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEXT:    ret
+  %vec.e0 = extractelement <vscale x 2 x float> %vec, i32 0
+  %vec.e1 = extractelement <vscale x 2 x float> %vec, i32 1
+
+  %1 = insertelement <2 x float> undef, float %vec.e0, i32 0
+  %2 = insertelement <2 x float> %1, float %vec.e1, i32 1
+  ret <2 x float> %2
+}
+
+define <4 x half> @extract_subreg_4f16_unpacked_nx4xf16(<vscale x 4 x half> %vec) nounwind {
+; CHECK-LABEL: extract_subreg_4f16_unpacked_nx4xf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %vec.e0 = extractelement <vscale x 4 x half> %vec, i32 0
+  %vec.e1 = extractelement <vscale x 4 x half> %vec, i32 1
+  %vec.e2 = extractelement <vscale x 4 x half> %vec, i32 2
+  %vec.e3 = extractelement <vscale x 4 x half> %vec, i32 3
+
+  %1 = insertelement <4 x half> undef, half %vec.e0, i32 0
+  %2 = insertelement <4 x half> %1, half %vec.e1, i32 1
+  %3 = insertelement <4 x half> %2, half %vec.e2, i32 2
+  %4 = insertelement <4 x half> %3, half %vec.e3, i32 3
+  ret <4 x half> %4
+}