Index: lib/Target/AArch64/AArch64.td
===================================================================
--- lib/Target/AArch64/AArch64.td
+++ lib/Target/AArch64/AArch64.td
@@ -49,6 +49,14 @@
 def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
                                         "Has zero-cycle zeroing instructions">;
 
+/// Similar to Cyclone, Kryo favors using imme #0 to zero out registers.  But
+/// Cylone's approach to zero a D or S register (movi D.2d + extract subreg) is
+/// not great in Kryo.  Kryo prefers using movi D #0 to directly zero out the D
+/// register.
+def FeatureImmeZeroing
+    : SubtargetFeature<"immez", "HasImmeZeroing", "true",
+                       "Use Immediate #0 to zero a register">;
+
 def FeatureStrictAlign : SubtargetFeature<"strict-align",
                                           "StrictAlign", "true",
                                           "Disallow all unaligned memory "
@@ -139,7 +147,8 @@
                                    FeatureNEON,
                                    FeatureCrypto,
                                    FeatureCRC,
-                                   FeaturePerfMon]>;
+                                   FeaturePerfMon,
+                                   FeatureImmeZeroing]>;
 
 def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
                                               FeatureNEON,
Index: lib/Target/AArch64/AArch64InstrInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.cpp
+++ lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1928,7 +1928,8 @@
             .addImm(0)
             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
       }
-    } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
+    } else if (SrcReg == AArch64::WZR && (Subtarget.hasZeroCycleZeroing() ||
+                                          Subtarget.hasImmeZeroing())) {
       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
     } else {
@@ -1964,7 +1965,8 @@
           .addReg(SrcReg, getKillRegState(KillSrc))
           .addImm(0)
           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
-    } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
+    } else if (SrcReg == AArch64::XZR && (Subtarget.hasZeroCycleZeroing() ||
+                                          Subtarget.hasImmeZeroing())) {
       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
     } else {
Index: lib/Target/AArch64/AArch64Subtarget.h
===================================================================
--- lib/Target/AArch64/AArch64Subtarget.h
+++ lib/Target/AArch64/AArch64Subtarget.h
@@ -64,6 +64,9 @@
   // HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
   bool HasZeroCycleZeroing;
 
+  // HasImmeZeroing - Use Immediate #0 to zero a register.
+  bool HasImmeZeroing;
+
   // StrictAlign - Disallow unaligned memory accesses.
   bool StrictAlign;
 
@@ -133,6 +136,8 @@
 
   bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
 
+  bool hasImmeZeroing() const { return HasImmeZeroing; }
+
   bool requiresStrictAlign() const { return StrictAlign; }
 
   bool isX18Reserved() const { return ReserveX18; }
Index: lib/Target/AArch64/AArch64Subtarget.cpp
===================================================================
--- lib/Target/AArch64/AArch64Subtarget.cpp
+++ lib/Target/AArch64/AArch64Subtarget.cpp
@@ -53,8 +53,8 @@
       HasV8_1aOps(false), HasV8_2aOps(false), HasFPARMv8(false), HasNEON(false),
       HasCrypto(false), HasCRC(false), HasPerfMon(false), HasFullFP16(false),
       HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
-      StrictAlign(false), ReserveX18(TT.isOSDarwin()), IsLittle(LittleEndian),
-      CPUString(CPU), TargetTriple(TT), FrameLowering(),
+      HasImmeZeroing(false), StrictAlign(false), ReserveX18(TT.isOSDarwin()),
+      IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
       InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
       TLInfo(TM, *this), GISel() {}
 
Index: test/CodeGen/AArch64/imme_zeroing.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/imme_zeroing.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s
+
+; Verify that immediate #0 is used when zeroing a register.
+ 
+define i32 @test_int32(i32* nocapture readonly %p) {
+; CHECK-LABEL:	test_int32:
+; CHECK:	movz	 x9, #0
+; CHECK:	movz	 w8, #0
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret i32 %add
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %p, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.06
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 10
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+define i64 @test_int64(i64* nocapture readonly %p) {
+; CHECK-LABEL:	test_int64:
+; CHECK:	movz	 x9, #0
+; CHECK:	movz	 x8, #0
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret i64 %add
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.07 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %sum.06 = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i64, i64* %p, i64 %i.07
+  %0 = load i64, i64* %arrayidx, align 8
+  %add = add nsw i64 %0, %sum.06
+  %inc = add nuw nsw i64 %i.07, 1
+  %exitcond = icmp eq i64 %inc, 10
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}