diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1602,10 +1602,26 @@
 
   static const TypeConversionCostTblEntry
   ConversionTbl[] = {
-    { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32,  1 },
-    { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64,  0 },
-    { ISD::TRUNCATE, MVT::v8i8,  MVT::v8i32,  3 },
-    { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
+    { ISD::TRUNCATE, MVT::v2i8,   MVT::v2i64,  1},  // xtn
+    { ISD::TRUNCATE, MVT::v2i16,  MVT::v2i64,  1},  // xtn
+    { ISD::TRUNCATE, MVT::v2i32,  MVT::v2i64,  1},  // xtn
+    { ISD::TRUNCATE, MVT::v4i8,   MVT::v4i32,  1},  // xtn
+    { ISD::TRUNCATE, MVT::v4i8,   MVT::v4i64,  3},  // 2 xtn + 1 uzp1
+    { ISD::TRUNCATE, MVT::v4i16,  MVT::v4i32,  1},  // xtn
+    { ISD::TRUNCATE, MVT::v4i16,  MVT::v4i64,  2},  // 1 uzp1 + 1 xtn
+    { ISD::TRUNCATE, MVT::v4i32,  MVT::v4i64,  1},  // 1 uzp1
+    { ISD::TRUNCATE, MVT::v8i8,   MVT::v8i16,  1},  // 1 xtn
+    { ISD::TRUNCATE, MVT::v8i8,   MVT::v8i32,  2},  // 1 uzp1 + 1 xtn
+    { ISD::TRUNCATE, MVT::v8i8,   MVT::v8i64,  4},  // 3 x uzp1 + xtn
+    { ISD::TRUNCATE, MVT::v8i16,  MVT::v8i32,  1},  // 1 uzp1
+    { ISD::TRUNCATE, MVT::v8i16,  MVT::v8i64,  3},  // 3 x uzp1
+    { ISD::TRUNCATE, MVT::v8i32,  MVT::v8i64,  2},  // 2 x uzp1
+    { ISD::TRUNCATE, MVT::v16i8,  MVT::v16i16, 1},  // uzp1
+    { ISD::TRUNCATE, MVT::v16i8,  MVT::v16i32, 3},  // (2 + 1) x uzp1
+    { ISD::TRUNCATE, MVT::v16i8,  MVT::v16i64, 7},  // (4 + 2 + 1) x uzp1
+    { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2},  // 2 x uzp1
+    { ISD::TRUNCATE, MVT::v16i16, MVT::v16i64, 6},  // (4 + 2) x uzp1
+    { ISD::TRUNCATE, MVT::v16i32, MVT::v16i64, 4},  // 4 x uzp1
 
     // Truncations on nxvmiN
     { ISD::TRUNCATE, MVT::nxv2i1, MVT::nxv2i16, 1 },
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll b/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll
--- a/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-overflow.ll
@@ -359,13 +359,13 @@
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
@@ -378,9 +378,9 @@
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
@@ -441,13 +441,13 @@
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 148 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; RECIP-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
+; RECIP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
 ; RECIP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
@@ -460,9 +460,9 @@
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
diff --git a/llvm/test/Analysis/CostModel/AArch64/cast.ll b/llvm/test/Analysis/CostModel/AArch64/cast.ll
--- a/llvm/test/Analysis/CostModel/AArch64/cast.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/cast.ll
@@ -167,26 +167,26 @@
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s2i8i32 = trunc <2 x i32> undef to <2 x i8>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s2i8i64 = trunc <2 x i64> undef to <2 x i8>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s2i16i32 = trunc <2 x i32> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s2i16i64 = trunc <2 x i64> undef to <2 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s2i32i64 = trunc <2 x i64> undef to <2 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s2i16i64 = trunc <2 x i64> undef to <2 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s2i32i64 = trunc <2 x i64> undef to <2 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i8i16 = trunc <4 x i16> undef to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i8i32 = trunc <4 x i32> undef to <4 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s4i8i32 = trunc <4 x i32> undef to <4 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s4i8i64 = trunc <4 x i64> undef to <4 x i8>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s4i16i32 = trunc <4 x i32> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s4i16i64 = trunc <4 x i64> undef to <4 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s4i32i64 = trunc <4 x i64> undef to <4 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s8i8i16 = trunc <8 x i16> undef to <8 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s8i8i32 = trunc <8 x i32> undef to <8 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s8i8i64 = trunc <8 x i64> undef to <8 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s8i16i32 = trunc <8 x i32> undef to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s8i16i64 = trunc <8 x i64> undef to <8 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s8i32i64 = trunc <8 x i64> undef to <8 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s16i8i16 = trunc <16 x i16> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s16i8i32 = trunc <16 x i32> undef to <16 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %s16i8i64 = trunc <16 x i64> undef to <16 x i8>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s16i16i32 = trunc <16 x i32> undef to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %s16i16i64 = trunc <16 x i64> undef to <16 x i16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s16i32i64 = trunc <16 x i64> undef to <16 x i32>
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %r8 = trunc i8 undef to i1