Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -21485,10 +21485,13 @@
   // Try to move vector bitcast after extract_subv by scaling extraction index:
   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
   if (V.getOpcode() == ISD::BITCAST &&
-      V.getOperand(0).getValueType().isVector() &&
+      V.getOperand(0).getValueType().isFixedLengthVector() &&
       (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
     SDValue SrcOp = V.getOperand(0);
     EVT SrcVT = SrcOp.getValueType();
+    // For scalable vectors, we purposely add the bitcasts, and only deal
+    // with integer extract_subvector. So we don't reorder those particular
+    // bitcasts.
     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
     if ((SrcNumElts % DestNumElts) == 0) {
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14928,11 +14928,32 @@
 static SDValue
 performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
                                SelectionDAG &DAG) {
+  EVT InVT = N->getOperand(0).getValueType();
+  EVT OutVT = N->getValueType(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDLoc DL(N);
+  // Reorder when the scalable vector's inner type is floating point and the
+  // outer type is not scalable vector. Also, the index shoud be 0 and all the
+  // input and output types should be legal to deal with.
+  if (InVT.isScalableVector() && InVT.isFloatingPoint() &&
+      DCI.isBeforeLegalize() && !OutVT.isScalableVector() &&
+      isNullConstant(N->getOperand(1)) && TLI.isTypeLegal(OutVT) &&
+      TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, InVT)) {
+    // Bitcast the input
+    SDValue VecOp = N->getOperand(0);
+    VecOp = DAG.getNode(ISD::BITCAST, DL, InVT.changeTypeToInteger(), VecOp);
+    // Perform extract in integer type
+    SDValue Extract =
+        DAG.getNode(N->getOpcode(), DL, OutVT.changeTypeToInteger(), VecOp,
+                    N->getOperand(1));
+    // Bitcast back to fp type
+    return DAG.getNode(ISD::BITCAST, DL, OutVT, Extract);
+  }
+
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  EVT VT = N->getValueType(0);
-  if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
+  if (!OutVT.isScalableVector() || OutVT.getVectorElementType() != MVT::i1)
     return SDValue();
 
   SDValue V = N->getOperand(0);
@@ -14943,7 +14964,7 @@
   // ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const)
   if (V.getOpcode() == ISD::SPLAT_VECTOR)
     if (isa<ConstantSDNode>(V.getOperand(0)))
-      return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V.getOperand(0));
+      return DAG.getNode(ISD::SPLAT_VECTOR, DL, OutVT, V.getOperand(0));
 
   return SDValue();
 }
Index: llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
===================================================================
--- llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
+++ llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mattr=+sve < %s | FileCheck %s
 
 target triple = "aarch64-unknown-linux-gnu"
 
 ; == Matching first N elements ==
 
-define <4 x i1> @reshuffle_v4i1_nxv4i1(<vscale x 4 x i1> %a) #0 {
+define <4 x i1> @reshuffle_v4i1_nxv4i1(<vscale x 4 x i1> %a) {
 ; CHECK-LABEL: reshuffle_v4i1_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z1.s, p0/z, #1 // =0x1
@@ -29,4 +29,34 @@
   ret <4 x i1> %v3
 }
 
-attributes #0 = { "target-features"="+sve" }
+; Extract from packed SVE vectors into different sizes of NEON registers.
+
+define <2 x float> @extract_subreg_2f32_unpacked_nx2xf32(<vscale x 2 x float> %vec) nounwind {
+; CHECK-LABEL: extract_subreg_2f32_unpacked_nx2xf32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEXT:    ret
+  %vec.e0 = extractelement <vscale x 2 x float> %vec, i32 0
+  %vec.e1 = extractelement <vscale x 2 x float> %vec, i32 1
+
+  %1 = insertelement <2 x float> undef, float %vec.e0, i32 0
+  %2 = insertelement <2 x float> %1, float %vec.e1, i32 1
+  ret <2 x float> %2
+}
+
+define <4 x half> @extract_subreg_4f16_unpacked_nx4xf16(<vscale x 4 x half> %vec) nounwind {
+; CHECK-LABEL: extract_subreg_4f16_unpacked_nx4xf16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %vec.e0 = extractelement <vscale x 4 x half> %vec, i32 0
+  %vec.e1 = extractelement <vscale x 4 x half> %vec, i32 1
+  %vec.e2 = extractelement <vscale x 4 x half> %vec, i32 2
+  %vec.e3 = extractelement <vscale x 4 x half> %vec, i32 3
+
+  %1 = insertelement <4 x half> undef, half %vec.e0, i32 0
+  %2 = insertelement <4 x half> %1, half %vec.e1, i32 1
+  %3 = insertelement <4 x half> %2, half %vec.e2, i32 2
+  %4 = insertelement <4 x half> %3, half %vec.e3, i32 3
+  ret <4 x half> %4
+}