Index: lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.td
+++ lib/Target/AArch64/AArch64InstrInfo.td
@@ -310,6 +310,17 @@
 
 //===----------------------------------------------------------------------===//
 
+// Any instruction that defines a 32-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. But any other 32-bit operation will zero-extend
+// up to 64 bits.
+// FIXME: X86 also checks for CMOV here. Do we need something similar?
+def def32 : PatLeaf<(i32 GPR32:$src), [{
+  return N->getOpcode() != ISD::TRUNCATE &&
+         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+         N->getOpcode() != ISD::CopyFromReg;
+}]>;
+
 //===----------------------------------------------------------------------===//
 // Miscellaneous instructions.
 //===----------------------------------------------------------------------===//
@@ -645,6 +656,13 @@
 def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3),
           (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>;
 }
+// Avoid using SUBSXrx for 32->64 zext if we can get it for free since
+// it's src regclass is GPR64sp, which prevents the folding of movs of
+// XZR.
+let AddedComplexity = 10 in {
+def : Pat<(sub GPR64:$Rn, (zext def32:$src)),
+          (SUBSXrr GPR64:$Rn, (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32))>;
+}
 
 // Because of the immediate format for add/sub-imm instructions, the
 // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
@@ -5301,17 +5319,6 @@
 //----------------------------------------------------------------------------
 // FIXME: Like for X86, these should go in their own separate .td file.
 
-// Any instruction that defines a 32-bit result leaves the high half of the
-// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. But any other 32-bit operation will zero-extend
-// up to 64 bits.
-// FIXME: X86 also checks for CMOV here. Do we need something similar?
-def def32 : PatLeaf<(i32 GPR32:$src), [{
-  return N->getOpcode() != ISD::TRUNCATE &&
-         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
-         N->getOpcode() != ISD::CopyFromReg;
-}]>;
-
 // In the case of a 32-bit def that is known to implicitly zero-extend,
 // we can use a SUBREG_TO_REG.
 def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
Index: test/CodeGen/AArch64/addsub_ext.ll
===================================================================
--- test/CodeGen/AArch64/addsub_ext.ll
+++ test/CodeGen/AArch64/addsub_ext.ll
@@ -304,19 +304,20 @@
     ret void
 }
 
-define void @sub_i32rhs() minsize {
+define void @sub_i32rhs(i32 %in32) minsize {
 ; CHECK-LABEL: sub_i32rhs:
     %val32_tmp = load i32, i32* @var32
     %lhs64 = load i64, i64* @var64
 
     %val32 = add i32 %val32_tmp, 123
 
-    %rhs64_zext = zext i32 %val32 to i64
+    %rhs64_zext = zext i32 %in32 to i64
     %res64_zext = sub i64 %lhs64, %rhs64_zext
     store volatile i64 %res64_zext, i64* @var64
 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
 
-    %rhs64_zext_shift = shl i64 %rhs64_zext, 2
+    %rhs64_zext2 = zext i32 %val32 to i64
+    %rhs64_zext_shift = shl i64 %rhs64_zext2, 2
     %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
     store volatile i64 %res64_zext_shift, i64* @var64
 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
Index: test/CodeGen/AArch64/neg-zext.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/neg-zext.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; ISEL used to prefer to match the sub as SUBSXrx, which prevented
+; folding of mov WZR to form a neg instruction.
+
+define i64 @test1(i32 %x)  {
+; CHECK-LABEL: test1:
+entry:
+  %m = and i32 %x, 3
+  %ext = zext i32 %m to i64
+; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
+; CHECK-NEXT: neg x0, x[[TMP]]
+; CHECK-NEXT: ret
+  %ret = sub i64 0, %ext
+  ret i64 %ret
+}