Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -867,8 +867,15 @@
     Reg = N.getOperand(0);
 
     // Don't match if free 32-bit -> 64-bit zext can be used instead.
-    if (Ext == AArch64_AM::UXTW &&
-        Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
+    auto isDef32 = [](SDValue N) {
+      unsigned Opc = N.getOpcode();
+      return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
+             Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
+             Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
+             Opc != ISD::FREEZE;
+    };
+    if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
+        isDef32(Reg))
       return false;
   }
 
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -457,23 +457,6 @@
 
 } // end namespace AArch64ISD
 
-namespace {
-
-// Any instruction that defines a 32-bit result zeros out the high half of the
-// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. But any other 32-bit operation will zero-extend
-// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
-// 32 bits, they're probably just qualifying a CopyFromReg.
-static inline bool isDef32(const SDNode &N) {
-  unsigned Opc = N.getOpcode();
-  return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
-         Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
-         Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
-         Opc != ISD::FREEZE;
-}
-
-} // end anonymous namespace
-
 namespace AArch64 {
 /// Possible values of current rounding mode, which is specified in bits
 /// 23:22 of FPCR.
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7259,14 +7259,6 @@
 //----------------------------------------------------------------------------
 // FIXME: Like for X86, these should go in their own separate .td file.
 
-def def32 : PatLeaf<(i32 GPR32:$src), [{
-  return isDef32(*N);
-}]>;
-
-// In the case of a 32-bit def that is known to implicitly zero-extend,
-// we can use a SUBREG_TO_REG.
-def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
-
 // For an anyext, we don't care what the high bits are, so we can perform an
 // INSERT_SUBREF into an IMPLICIT_DEF.
 def : Pat<(i64 (anyext GPR32:$src)),
Index: llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -195,6 +195,29 @@
       });
 }
 
+// From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
+//
+// When you use the 32-bit form of an instruction, the upper 32 bits of the
+// source registers are ignored and the upper 32 bits of the destination
+// register are set to zero.
+//
+// If AArch64's 32-bit form of instruction defines the source operand of
+// zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
+// real AArch64 instruction and if it is not, do not process the opcode
+// conservatively.
+static bool isFreeZExtOfGPR32(MachineInstr *MI, MachineRegisterInfo *MRI) {
+  // A COPY from and FPR will become a FMOVSWr.
+  if (MI->getOpcode() == TargetOpcode::COPY &&
+      MI->getOperand(1).getReg().isVirtual()) {
+    const TargetRegisterClass *RC =
+        MRI->getRegClass(MI->getOperand(1).getReg());
+    if (RC == &AArch64::FPR32RegClass || RC == &AArch64::FPR64RegClass ||
+        RC == &AArch64::FPR128RegClass)
+      return true;
+  }
+  return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
+}
+
 bool AArch64MIPeepholeOpt::visitORR(
     MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
   // Check this ORR comes from below zero-extend pattern.
@@ -208,20 +231,7 @@
     return false;
 
   MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
-  if (!SrcMI)
-    return false;
-
-  // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
-  //
-  // When you use the 32-bit form of an instruction, the upper 32 bits of the
-  // source registers are ignored and the upper 32 bits of the destination
-  // register are set to zero.
-  //
-  // If AArch64's 32-bit form of instruction defines the source operand of
-  // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
-  // real AArch64 instruction and if it is not, do not process the opcode
-  // conservatively.
-  if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
+  if (!SrcMI || !isFreeZExtOfGPR32(SrcMI, MRI))
     return false;
 
   Register DefReg = MI.getOperand(0).getReg();
Index: llvm/test/CodeGen/AArch64/arm64-popcnt.ll
===================================================================
--- llvm/test/CodeGen/AArch64/arm64-popcnt.ll
+++ llvm/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -35,8 +35,8 @@
 ; CHECK-LABEL: cnt32_advsimd_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    cnt.8b v0, v0
 ; CHECK-NEXT:    uaddlv.8b h0, v0
 ; CHECK-NEXT:    fmov w0, s0
Index: llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
===================================================================
--- llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
+++ llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
@@ -328,7 +328,8 @@
 define i64 @sign_extend_inreg_isdef32(i64) {
 ; CHECK-LABEL: sign_extend_inreg_isdef32:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    sbfx x0, x0, #32, #16
+; CHECK-NEXT:    sbfx x8, x0, #32, #16
+; CHECK-NEXT:    mov w0, w8
 ; CHECK-NEXT:    ret
   %2 = lshr i64 %0, 32
   %3 = shl i64 %2, 16
Index: llvm/test/CodeGen/AArch64/dp1.ll
===================================================================
--- llvm/test/CodeGen/AArch64/dp1.ll
+++ llvm/test/CodeGen/AArch64/dp1.ll
@@ -246,7 +246,6 @@
 ; CHECK-GISEL-NEXT:    cnt v0.8b, v0.8b
 ; CHECK-GISEL-NEXT:    uaddlv h0, v0.8b
 ; CHECK-GISEL-NEXT:    fmov w9, s0
-; CHECK-GISEL-NEXT:    mov w9, w9
 ; CHECK-GISEL-NEXT:    str x9, [x8]
 ; CHECK-GISEL-NEXT:    ret
   %val0_tmp = load i64, i64* @var64