diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -371,101 +371,104 @@ return AdjustCost( BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); - static const TypeConversionCostTblEntry - ConversionTbl[] = { - { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, - { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, - { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, - { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 }, - - // The number of shll instructions for the extension. - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, - { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, - { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, - { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, - { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, - { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, - { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, - { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, - { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, - - // LowerVectorINT_TO_FP: - { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, - { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, - - // Complex: to v2f32 - { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, - { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, - { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, - { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, - { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, - { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, - - // Complex: to v4f32 - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, - - // Complex: to v8f32 - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, - - // Complex: to v16f32 - { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 }, - { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 }, - - // Complex: to v2f64 - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, - - - // LowerVectorFP_TO_INT - { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 }, - { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, - { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 }, - { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, - { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, - - // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext). - { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 }, - { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 }, - { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 }, - { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 }, - { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 }, - { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 }, - - // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2 - { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, - { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 }, - { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, - { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 }, - - // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2. - { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, - { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 }, - { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, - { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 }, - { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 }, + static const TypeConversionCostTblEntry ConversionTbl[] = { + {ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1}, + {ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0}, + {ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3}, + {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6}, + + // The number of shll instructions for the extension. + {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3}, + {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3}, + {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2}, + {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2}, + {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3}, + {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3}, + {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2}, + {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2}, + {ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7}, + {ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7}, + {ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6}, + {ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6}, + {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2}, + {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2}, + {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6}, + {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6}, + + // LowerVectorINT_TO_FP: + {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1}, + {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1}, + {ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1}, + {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1}, + {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1}, + {ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1}, + + // Complex: to v2f32 + {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3}, + {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3}, + {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2}, + {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3}, + {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3}, + {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2}, + + // Complex: to v4f32 + {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4}, + {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2}, + {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3}, + {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2}, + + // Complex: to v8f32 + {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10}, + {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4}, + {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10}, + {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4}, + + // Complex: to v16f32 + {ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21}, + {ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21}, + + // Complex: to v2f64 + {ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4}, + {ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4}, + {ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2}, + {ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4}, + {ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4}, + {ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2}, + + // LowerVectorFP_TO_INT + {ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1}, + {ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1}, + {ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1}, + {ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1}, + {ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1}, + {ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1}, + + // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext). + {ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2}, + {ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1}, + {ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1}, + {ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2}, + {ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1}, + {ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1}, + + // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2 + {ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2}, + {ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2}, + {ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2}, + {ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2}, + + // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2. + {ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2}, + {ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2}, + {ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2}, + {ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2}, + {ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2}, + {ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2}, + + // From nxvmf32 to nxmf64 + {ISD::FP_ROUND, MVT::nxv2f32, MVT::nxv2f64, 1}, + {ISD::FP_ROUND, MVT::nxv4f32, MVT::nxv4f64, 1}, + {ISD::FP_ROUND, MVT::nxv8f32, MVT::nxv8f64, 1}, }; if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD, diff --git a/llvm/test/CodeGen/AArch64/fptrunc-cost.ll b/llvm/test/CodeGen/AArch64/fptrunc-cost.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fptrunc-cost.ll @@ -0,0 +1,109 @@ +; RUN: opt -debug-only=loop-vectorize -loop-vectorize -mtriple aarch64-linux-gnu -mattr=+sve -S -o - < %s 2>&1| FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +define void @f64_to_f32_vf2(float* nocapture %a, double* nocapture readonly %b, i64 %n) { +; CHECK: LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %conv = fptrunc double %sub to float +; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %conv = fptrunc double %sub to float +; CHECK: LV: Found an estimated cost of 1 for VF vscale x 8 For instruction: %conv = fptrunc double %sub to float + +; CHECK-LABEL: @f64_to_f32_vf2 +; CHECK: vector.body: +entry: + %cmp6 = icmp sgt i64 %n, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + + +for.body: ; preds = %for.body.preheader, %for.body + %i.07 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds double, double* %b, i64 %i.07 + %0 = load double, double* %arrayidx, align 8 + %sub = fadd double %0, -1.000000e+00 + %conv = fptrunc double %sub to float + %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.07 + store float %conv, float* %arrayidx1, align 4 + %inc = add nuw nsw i64 %i.07, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !20 +} + +define void @f64_to_f32_vf4(float* nocapture %a, double* nocapture readonly %b, i64 %n) { +; CHECK-LABEL: @f64_to_f32_vf4 +; CHECK: vector.body: +entry: + %cmp6 = icmp sgt i64 %n, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + + +for.body: ; preds = %for.body.preheader, %for.body + %i.07 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds double, double* %b, i64 %i.07 + %0 = load double, double* %arrayidx, align 8 + %sub = fadd double %0, -1.000000e+00 + %conv = fptrunc double %sub to float + %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.07 + store float %conv, float* %arrayidx1, align 4 + %inc = add nuw nsw i64 %i.07, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !21 +} + +define void @f64_to_f32_vf8(float* nocapture %a, double* nocapture readonly %b, i64 %n) { +; CHECK-LABEL: @f64_to_f32_vf8 +; CHECK: vector.body: +entry: + %cmp6 = icmp sgt i64 %n, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + + +for.body: ; preds = %for.body.preheader, %for.body + %i.07 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds double, double* %b, i64 %i.07 + %0 = load double, double* %arrayidx, align 8 + %sub = fadd double %0, -1.000000e+00 + %conv = fptrunc double %sub to float + %arrayidx1 = getelementptr inbounds float, float* %a, i64 %i.07 + store float %conv, float* %arrayidx1, align 4 + %inc = add nuw nsw i64 %i.07, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !22 +} + +!13 = !{!"llvm.loop.mustprogress"} +!14 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} +!15 = !{!"llvm.loop.interleave.count", i32 1} +!16 = !{!"llvm.loop.vectorize.enable", i1 true} +!17 = !{!"llvm.loop.vectorize.width", i32 2} +!18 = !{!"llvm.loop.vectorize.width", i32 4} +!19 = !{!"llvm.loop.vectorize.width", i32 8} +!20 = distinct !{!20, !13, !14, !15, !16, !17} ; VF vscale x 2 +!21 = distinct !{!21, !13, !14, !15, !16, !18} ; VF vscale x 4 +!22 = distinct !{!22, !13, !14, !15, !16, !19} ; VF vscale x 8