Index: lib/Target/AArch64/AArch64.td =================================================================== --- lib/Target/AArch64/AArch64.td +++ lib/Target/AArch64/AArch64.td @@ -237,7 +237,8 @@ FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, - FeaturePredictableSelectIsExpensive + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing ]>; def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan", Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -620,6 +620,16 @@ return canBeExpandedToORR(MI, 32); case AArch64::MOVi64imm: return canBeExpandedToORR(MI, 64); + + // Kryo has zero cycle zeroing feature. + case AArch64::FMOVS0: + case AArch64::FMOVD0: + return (Subtarget.getProcFamily() == AArch64Subtarget::Kryo); + + case AArch64::COPY: + return (Subtarget.getProcFamily() == AArch64Subtarget::Kryo && + (MI.getOperand(1).getReg() == AArch64::WZR || + MI.getOperand(1).getReg() == AArch64::XZR)); } llvm_unreachable("Unknown opcode to check as cheap as a move!"); Index: test/CodeGen/AArch64/kryo_zeroing.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/kryo_zeroing.ll @@ -0,0 +1,87 @@ +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s + +; Verify that immediate #0 is used when zeroing a D, X, W register. + +define i32 @test_int32(i32* nocapture readonly %p) { +; CHECK-LABEL: test_int32: +; CHECK: mov x9, #0 +; CHECK: mov w8, #0 +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i32 %add + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %sum.06 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %p, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %sum.06 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 10 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +define i64 @test_int64(i64* nocapture readonly %p) { +; CHECK-LABEL: test_int64: +; CHECK: mov x9, #0 +; CHECK: mov x8, #0 +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret i64 %add + +for.body: ; preds = %for.body, %entry + %i.07 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %sum.06 = phi i64 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i64, i64* %p, i64 %i.07 + %0 = load i64, i64* %arrayidx, align 8 + %add = add nsw i64 %0, %sum.06 + %inc = add nuw nsw i64 %i.07, 1 + %exitcond = icmp eq i64 %inc, 10 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +define double @test_double(double* nocapture readonly %p) { +; CHECK-LABEL: test_double: +; CHECK: movi v0.2d, #0000000000000000 +; CHECK: mov x8, #0 +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret double %add + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %sum.06 = phi double [ 0.0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds double, double* %p, i64 %indvars.iv + %0 = load double, double* %arrayidx, align 4 + %add = fadd double %0, %sum.06 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 10 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +define float @test_float(float* nocapture readonly %p) { +; CHECK-LABEL: test_float: +; CHECK: movi v0.2d, #0000000000000000 +; CHECK: mov x8, #0 +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret float %add + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %sum.06 = phi float [ 0.0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds float, float* %p, i64 %indvars.iv + %0 = load float, float* %arrayidx, align 4 + %add = fadd float %0, %sum.06 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 10 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +}