diff --git a/llvm/include/llvm/Target/GlobalISel/Target.td b/llvm/include/llvm/Target/GlobalISel/Target.td
--- a/llvm/include/llvm/Target/GlobalISel/Target.td
+++ b/llvm/include/llvm/Target/GlobalISel/Target.td
@@ -22,6 +22,8 @@
 
 def s32 : LLT;
 def s64 : LLT;
+def v2s32 : LLT;
+def v4s16 : LLT;
 
 // Defines a matcher for complex operands. This is analogous to ComplexPattern
 // from SelectionDAG.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -420,7 +420,6 @@
   bool tryBitfieldInsertOp(SDNode *N);
   bool tryBitfieldInsertInZeroOp(SDNode *N);
   bool tryShiftAmountMod(SDNode *N);
-  bool tryHighFPExt(SDNode *N);
 
   bool tryReadRegister(SDNode *N);
   bool tryWriteRegister(SDNode *N);
@@ -2470,35 +2469,6 @@
   return true;
 }
 
-/// Try to form fcvtl2 instructions from a floating-point extend of a high-half
-/// extract of a subvector.
-bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) {
-  assert(N->getOpcode() == ISD::FP_EXTEND);
-
-  // There are 2 forms of fcvtl2 - extend to double or extend to float.
-  SDValue Extract = N->getOperand(0);
-  EVT VT = N->getValueType(0);
-  EVT NarrowVT = Extract.getValueType();
-  if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) &&
-      (VT != MVT::v4f32 || NarrowVT != MVT::v4f16))
-    return false;
-
-  // Optionally look past a bitcast.
-  Extract = peekThroughBitcasts(Extract);
-  if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
-    return false;
-
-  // Match extract from start of high half index.
-  // Example: v8i16 -> v4i16 means the extract must begin at index 4.
-  unsigned ExtractIndex = Extract.getConstantOperandVal(1);
-  if (ExtractIndex != Extract.getValueType().getVectorNumElements())
-    return false;
-
-  auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
-  CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0));
-  return true;
-}
-
 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
                                 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
                                 unsigned NumberOfIgnoredLowBits = 0,
@@ -4272,11 +4242,6 @@
       return;
     break;
 
-  case ISD::FP_EXTEND:
-    if (tryHighFPExt(Node))
-      return;
-    break;
-
   case ISD::OR:
     if (tryBitfieldInsertOp(Node))
       return;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -133,6 +133,18 @@
 def extract_high_v2i64 :
     ComplexPattern<v1i64, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
 
+def extract_high_v8f16 :
+    ComplexPattern<v4f16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
+def extract_high_v4f32 :
+    ComplexPattern<v2f32, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
+
+def gi_extract_high_v8f16 :
+  GIComplexOperandMatcher<v4s16, "selectExtractHigh">,
+  GIComplexPatternEquiv<extract_high_v8f16>;
+def gi_extract_high_v4f32 :
+  GIComplexOperandMatcher<v2s32, "selectExtractHigh">,
+  GIComplexPatternEquiv<extract_high_v4f32>;
+
 def extract_high_dup_v8i16 :
    BinOpFrag<(extract_subvector (v8i16 (AArch64duplane16 (v8i16 node:$LHS), node:$RHS)), (i64 4))>;
 def extract_high_dup_v4i32 :
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4690,11 +4690,16 @@
 def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
           (FCVTLv4i16 V64:$Rn)>;
 def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
-                                                              (i64 4)))),
+                                                                (i64 4)))),
+          (FCVTLv8i16 V128:$Rn)>;
+def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))),
+          (FCVTLv2i32 V64:$Rn)>;
+def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))),
+          (FCVTLv4i32 V128:$Rn)>;
+def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))),
+          (FCVTLv4i16 V64:$Rn)>;
+def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))),
           (FCVTLv8i16 V128:$Rn)>;
-def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
-
-def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
 
 defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
 defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -436,6 +436,8 @@
 
   ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
 
+  ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
+
   void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
                       int OpIdx = -1) const;
   void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
@@ -6877,6 +6879,23 @@
            }}};
 }
 
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
+  if (!Root.isReg())
+    return std::nullopt;
+  MachineRegisterInfo &MRI =
+      Root.getParent()->getParent()->getParent()->getRegInfo();
+
+  MachineInstr *Extract = getDefIgnoringCopies(Root.getReg(), MRI);
+  if (Extract && Extract->getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+      Root.getReg() == Extract->getOperand(1).getReg()) {
+    Register ExtReg = Extract->getOperand(2).getReg();
+    return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
+  }
+
+  return std::nullopt;
+}
+
 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
                                                 const MachineInstr &MI,
                                                 int OpIdx) const {
diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
@@ -74,11 +74,10 @@
 define <4 x i64> @fptosi_v4i64_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: fptosi_v4i64_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    fcvtl2 v1.2d, v0.4s
 ; CHECK-NEXT:    fcvtl v0.2d, v0.2s
-; CHECK-NEXT:    fcvtl v1.2d, v1.2s
-; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-NEXT:    fcvtzs v1.2d, v1.2d
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-NEXT:    ret
   %val = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
   ret <4 x i64> %val
@@ -87,11 +86,10 @@
 define <4 x i64> @fptoui_v4i64_v4f32(<4 x float> %x) #0 {
 ; CHECK-LABEL: fptoui_v4i64_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    fcvtl2 v1.2d, v0.4s
 ; CHECK-NEXT:    fcvtl v0.2d, v0.2s
-; CHECK-NEXT:    fcvtl v1.2d, v1.2s
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-NEXT:    fcvtzu v1.2d, v1.2d
+; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-NEXT:    ret
   %val = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
   ret <4 x i64> %val
diff --git a/llvm/test/CodeGen/AArch64/fpext.ll b/llvm/test/CodeGen/AArch64/fpext.ll
--- a/llvm/test/CodeGen/AArch64/fpext.ll
+++ b/llvm/test/CodeGen/AArch64/fpext.ll
@@ -75,9 +75,9 @@
 ;
 ; CHECK-GI-LABEL: fpext_v4f32_v4f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    fcvtl v0.2d, v0.2s
-; CHECK-GI-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-GI-NEXT:    fcvtl v2.2d, v0.2s
+; CHECK-GI-NEXT:    fcvtl2 v1.2d, v0.4s
+; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fpext <4 x float> %a to <4 x double>
@@ -224,9 +224,9 @@
 ;
 ; CHECK-GI-LABEL: fpext_v8f16_v8f32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov d1, v0.d[1]
-; CHECK-GI-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-GI-NEXT:    fcvtl v1.4s, v1.4h
+; CHECK-GI-NEXT:    fcvtl v2.4s, v0.4h
+; CHECK-GI-NEXT:    fcvtl2 v1.4s, v0.8h
+; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fpext <8 x half> %a to <8 x float>