Index: lib/Target/AArch64/AArch64.td =================================================================== --- lib/Target/AArch64/AArch64.td +++ lib/Target/AArch64/AArch64.td @@ -237,7 +237,8 @@ FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing ]>; def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan", Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -620,6 +620,16 @@ return canBeExpandedToORR(MI, 32); case AArch64::MOVi64imm: return canBeExpandedToORR(MI, 64); + + // Kryo has zero cycle zeroing feature. + case AArch64::FMOVS0: + case AArch64::FMOVD0: + return (Subtarget.getProcFamily() == AArch64Subtarget::Kryo); + + case AArch64::COPY: + return (Subtarget.getProcFamily() == AArch64Subtarget::Kryo && + (MI.getOperand(1).getReg() == AArch64::WZR || + MI.getOperand(1).getReg() == AArch64::XZR)); } llvm_unreachable("Unknown opcode to check as cheap as a move!"); Index: test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll =================================================================== --- test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll +++ test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll @@ -1,44 +1,52 @@ -; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s +; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s -check-prefix=CYCLONE --check-prefix=ALL +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s -check-prefix=KRYO --check-prefix=ALL + ; rdar://11481771 ; rdar://13713797 define void @t1() nounwind ssp { entry: -; CHECK-LABEL: t1: -; CHECK-NOT: fmov -; CHECK: movi.2d v0, #0000000000000000 -; CHECK: movi.2d v1, #0000000000000000 -; CHECK: movi.2d v2, #0000000000000000 -; CHECK: movi.2d v3, #0000000000000000 +; ALL-LABEL: t1: +; ALL-NOT: fmov +; CYCLONE: movi.2d v0, #0000000000000000 +; CYCLONE: movi.2d v1, #0000000000000000 +; CYCLONE: movi.2d v2, #0000000000000000 +; CYCLONE: movi.2d v3, #0000000000000000 +; KRYO: movi v0.2d, #0000000000000000 +; KRYO: movi v1.2d, #0000000000000000 +; KRYO: movi v2.2d, #0000000000000000 +; KRYO: movi v3.2d, #0000000000000000 tail call void @bar(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00) nounwind ret void } define void @t2() nounwind ssp { entry: -; CHECK-LABEL: t2: -; CHECK-NOT: mov w0, wzr -; CHECK: mov w0, #0 -; CHECK: mov w1, #0 +; ALL-LABEL: t2: +; ALL-NOT: mov w0, wzr +; ALL: mov w0, #0 +; ALL: mov w1, #0 tail call void @bari(i32 0, i32 0) nounwind ret void } define void @t3() nounwind ssp { entry: -; CHECK-LABEL: t3: -; CHECK-NOT: mov x0, xzr -; CHECK: mov x0, #0 -; CHECK: mov x1, #0 +; ALL-LABEL: t3: +; ALL-NOT: mov x0, xzr +; ALL: mov x0, #0 +; ALL: mov x1, #0 tail call void @barl(i64 0, i64 0) nounwind ret void } define void @t4() nounwind ssp { -; CHECK-LABEL: t4: -; CHECK-NOT: fmov -; CHECK: movi.2d v0, #0000000000000000 -; CHECK: movi.2d v1, #0000000000000000 +; ALL-LABEL: t4: +; ALL-NOT: fmov +; CYCLONE: movi.2d v0, #0000000000000000 +; CYCLONE: movi.2d v1, #0000000000000000 +; KRYO: movi v0.2d, #0000000000000000 +; KRYO: movi v1.2d, #0000000000000000 tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind ret void } @@ -50,9 +58,9 @@ ; We used to produce spills+reloads for a Q register with zero cycle zeroing ; enabled. -; CHECK-LABEL: foo: -; CHECK-NOT: str {{q[0-9]+}} -; CHECK-NOT: ldr {{q[0-9]+}} +; ALL-LABEL: foo: +; ALL-NOT: str {{q[0-9]+}} +; ALL-NOT: ldr {{q[0-9]+}} define double @foo(i32 %n) { entry: br label %for.body