Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -577,6 +577,8 @@ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 }, { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 }, + { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 }, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, @@ -591,11 +593,13 @@ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 5 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 12 }, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 }, @@ -685,6 +689,7 @@ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 6 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 }, @@ -693,9 +698,11 @@ // here. We have roughly 10 instructions per scalar element. // Multiply that by the vector width. // FIXME: remove that when PR19268 is fixed. - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 }, - { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 4*10 }, - + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 10 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 20 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 }, // This node is expanded into scalarized operations but BasicTTI is overly @@ -705,6 +712,9 @@ // should be factored in too. Inflating the cost per element by 1. { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 8*4 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 }, + + { ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 1 }, + { ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 1 }, }; static const TypeConversionCostTblEntry SSE41ConversionTbl[] = { Index: test/Analysis/CostModel/X86/sitofp.ll =================================================================== --- test/Analysis/CostModel/X86/sitofp.ll +++ test/Analysis/CostModel/X86/sitofp.ll @@ -264,13 +264,13 @@ ; SSE2: cost of 40 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv4i64v4double - ; AVX1: cost of 10 {{.*}} sitofp + ; AVX1: cost of 13 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv4i64v4double - ; AVX2: cost of 10 {{.*}} sitofp + ; AVX2: cost of 13 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv4i64v4double - ; AVX512F: cost of 10 {{.*}} sitofp + ; AVX512F: cost of 13 {{.*}} sitofp %1 = sitofp <4 x i64> %a to <4 x double> ret <4 x double> %1 } @@ -280,10 +280,10 @@ ; SSE2: cost of 80 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv8i64v8double - ; AVX1: cost of 21 {{.*}} sitofp + ; AVX1: cost of 27 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv8i64v8double - ; AVX2: cost of 21 {{.*}} sitofp + ; AVX2: cost of 27 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv8i64v8double ; AVX512F: cost of 22 {{.*}} sitofp @@ -296,10 +296,10 @@ ; SSE2: cost of 160 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv16i64v16double - ; AVX1: cost of 43 {{.*}} sitofp + ; AVX1: cost of 55 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv16i64v16double - ; AVX2: cost of 43 {{.*}} sitofp + ; AVX2: cost of 55 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv16i64v16double ; AVX512F: cost of 45 {{.*}} sitofp @@ -312,10 +312,10 @@ ; SSE2: cost of 320 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv32i64v32double - ; AVX1: cost of 87 {{.*}} sitofp + ; AVX1: cost of 111 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv32i64v32double - ; AVX2: cost of 87 {{.*}} sitofp + ; AVX2: cost of 111 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv32i64v32double ; AVX512F: cost of 91 {{.*}} sitofp Index: test/Analysis/CostModel/X86/uitofp.ll =================================================================== --- test/Analysis/CostModel/X86/uitofp.ll +++ test/Analysis/CostModel/X86/uitofp.ll @@ -169,13 +169,13 @@ ; SSE2: cost of 20 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv2i32v2double - ; AVX1: cost of 4 {{.*}} uitofp + ; AVX1: cost of 6 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv2i32v2double - ; AVX2: cost of 4 {{.*}} uitofp + ; AVX2: cost of 6 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv2i32v2double - ; AVX512F: cost of 4 {{.*}} uitofp + ; AVX512F: cost of 1 {{.*}} uitofp %1 = uitofp <2 x i32> %a to <2 x double> ret <2 x double> %1 } @@ -249,10 +249,10 @@ ; SSE2: cost of 20 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv2i64v2double - ; AVX1: cost of 20 {{.*}} uitofp + ; AVX1: cost of 10 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv2i64v2double - ; AVX2: cost of 20 {{.*}} uitofp + ; AVX2: cost of 10 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv2i64v2double ; AVX512F: cost of 5 {{.*}} uitofp @@ -268,10 +268,10 @@ ; SSE2: cost of 40 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv4i64v4double - ; AVX1: cost of 40 {{.*}} uitofp + ; AVX1: cost of 20 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv4i64v4double - ; AVX2: cost of 40 {{.*}} uitofp + ; AVX2: cost of 20 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv4i64v4double ; AVX512F: cost of 12 {{.*}} uitofp @@ -287,10 +287,10 @@ ; SSE2: cost of 80 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv8i64v8double - ; AVX1: cost of 81 {{.*}} uitofp + ; AVX1: cost of 41 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv8i64v8double - ; AVX2: cost of 81 {{.*}} uitofp + ; AVX2: cost of 41 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv8i64v8double ; AVX512F: cost of 26 {{.*}} uitofp @@ -306,10 +306,10 @@ ; SSE2: cost of 160 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv16i64v16double - ; AVX1: cost of 163 {{.*}} uitofp + ; AVX1: cost of 83 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv16i64v16double - ; AVX2: cost of 163 {{.*}} uitofp + ; AVX2: cost of 83 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i64v16double ; AVX512F: cost of 53 {{.*}} uitofp @@ -325,10 +325,10 @@ ; SSE2: cost of 320 {{.*}} uitofp ; ; AVX1-LABEL: uitofpv32i64v32double - ; AVX1: cost of 327 {{.*}} uitofp + ; AVX1: cost of 167 {{.*}} uitofp ; ; AVX2-LABEL: uitofpv32i64v32double - ; AVX2: cost of 327 {{.*}} uitofp + ; AVX2: cost of 167 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i64v32double ; AVX512F: cost of 107 {{.*}} uitofp @@ -590,7 +590,7 @@ ; AVX2: cost of 4 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv2i64v2float - ; AVX512F: cost of 4 {{.*}} uitofp + ; AVX512F: cost of 5 {{.*}} uitofp %1 = uitofp <2 x i64> %a to <2 x float> ret <2 x float> %1 } @@ -622,7 +622,7 @@ ; AVX2: cost of 21 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv8i64v8float - ; AVX512F: cost of 22 {{.*}} uitofp + ; AVX512F: cost of 26 {{.*}} uitofp %1 = uitofp <8 x i64> %a to <8 x float> ret <8 x float> %1 } @@ -638,7 +638,7 @@ ; AVX2: cost of 43 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv16i64v16float - ; AVX512F: cost of 45 {{.*}} uitofp + ; AVX512F: cost of 53 {{.*}} uitofp %1 = uitofp <16 x i64> %a to <16 x float> ret <16 x float> %1 } @@ -654,7 +654,7 @@ ; AVX2: cost of 87 {{.*}} uitofp ; ; AVX512F-LABEL: uitofpv32i64v32float - ; AVX512F: cost of 91 {{.*}} uitofp + ; AVX512F: cost of 107 {{.*}} uitofp %1 = uitofp <32 x i64> %a to <32 x float> ret <32 x float> %1 } Index: test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll =================================================================== --- test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll +++ test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll @@ -5,8 +5,8 @@ target triple = "x86_64-apple-macosx10.8.0" -; CHECK: cost of 20 for VF 2 For instruction: %conv = uitofp i64 %tmp to double -; CHECK: cost of 40 for VF 4 For instruction: %conv = uitofp i64 %tmp to double +; CHECK: cost of 10 for VF 2 For instruction: %conv = uitofp i64 %tmp to double +; CHECK: cost of 20 for VF 4 For instruction: %conv = uitofp i64 %tmp to double define void @uint64_to_double_cost(i64* noalias nocapture %a, double* noalias nocapture readonly %b) nounwind { entry: br label %for.body