Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -709,6 +709,13 @@ }; static const TypeConversionCostTblEntry SSE41ConversionTbl[] = { + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 2 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, @@ -759,6 +766,13 @@ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 5 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 10 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 }, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 }, Index: test/Analysis/CostModel/X86/sse-itoi.ll =================================================================== --- test/Analysis/CostModel/X86/sse-itoi.ll +++ test/Analysis/CostModel/X86/sse-itoi.ll @@ -1,6 +1,85 @@ ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse4.1 -cost-model -analyze < %s | FileCheck --check-prefix=SSE41 %s +define void @zext_v4i8_to_v4i64(<4 x i8>* %a) { +; SSE2: zext_v4i8_to_v4i64 +; SSE2: cost of 4 {{.*}} zext +; +; SSE41: zext_v4i8_to_v4i64 +; SSE41: cost of 2 {{.*}} zext +; + %1 = load <4 x i8>, <4 x i8>* %a + %2 = zext <4 x i8> %1 to <4 x i64> + store <4 x i64> %2, <4 x i64>* undef, align 4 + ret void +} + +define void @sext_v4i8_to_v4i64(<4 x i8>* %a) { +; SSE2: sext_v4i8_to_v4i64 +; SSE2: cost of 8 {{.*}} sext +; +; SSE41: sext_v4i8_to_v4i64 +; SSE41: cost of 2 {{.*}} sext +; + %1 = load <4 x i8>, <4 x i8>* %a + %2 = sext <4 x i8> %1 to <4 x i64> + store <4 x i64> %2, <4 x i64>* undef, align 4 + ret void +} + +define void @zext_v4i16_to_v4i64(<4 x i16>* %a) { +; SSE2: zext_v4i16_to_v4i64 +; SSE2: cost of 3 {{.*}} zext +; +; SSE41: zext_v4i16_to_v4i64 +; SSE41: cost of 2 {{.*}} zext +; + %1 = load <4 x i16>, <4 x i16>* %a + %2 = zext <4 x i16> %1 to <4 x i64> + store <4 x i64> %2, <4 x i64>* undef, align 4 + ret void +} + +define void @sext_v4i16_to_v4i64(<4 x i16>* %a) { +; SSE2: sext_v4i16_to_v4i64 +; SSE2: cost of 10 {{.*}} sext +; +; SSE41: sext_v4i16_to_v4i64 +; SSE41: cost of 2 {{.*}} sext +; + %1 = load <4 x i16>, <4 x i16>* %a + %2 = sext <4 x i16> %1 to <4 x i64> + store <4 x i64> %2, <4 x i64>* undef, align 4 + ret void +} + + +define void @zext_v4i32_to_v4i64(<4 x i32>* %a) { +; SSE2: zext_v4i32_to_v4i64 +; SSE2: cost of 3 {{.*}} zext +; +; SSE41: zext_v4i32_to_v4i64 +; SSE41: cost of 2 {{.*}} zext +; + %1 = load <4 x i32>, <4 x i32>* %a + %2 = zext <4 x i32> %1 to <4 x i64> + store <4 x i64> %2, <4 x i64>* undef, align 4 + ret void +} + +define void @sext_v4i32_to_v4i64(<4 x i32>* %a) { +; SSE2: sext_v4i32_to_v4i64 +; SSE2: cost of 5 {{.*}} sext +; +; SSE41: sext_v4i32_to_v4i64 +; SSE41: cost of 2 {{.*}} sext +; + %1 = load <4 x i32>, <4 x i32>* %a + %2 = sext <4 x i32> %1 to <4 x i64> + store <4 x i64> %2, <4 x i64>* undef, align 4 + ret void +} + define void @zext_v16i16_to_v16i32(<16 x i16>* %a) { ; SSE2: zext_v16i16_to_v16i32 ; SSE2: cost of 6 {{.*}} zext