Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -24777,12 +24777,14 @@
   return true;
 }
 
-bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
-  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+bool X86TargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
+  if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
     return false;
-  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
-  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
-  return NumBits1 > NumBits2;
+  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
+  unsigned DstBits = DstTy->getPrimitiveSizeInBits();
+  return SrcBits > DstBits &&
+         (SrcBits == 64 || SrcBits == 32 || SrcBits == 16) &&
+         (DstBits == 32 || DstBits == 16 || DstBits == 8);
 }
 
 bool X86TargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
@@ -24808,12 +24810,14 @@
   return isInt<32>(Imm);
 }
 
-bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
-  if (!VT1.isInteger() || !VT2.isInteger())
+bool X86TargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
+  if (!SrcVT.isScalarInteger() || !DstVT.isScalarInteger())
     return false;
-  unsigned NumBits1 = VT1.getSizeInBits();
-  unsigned NumBits2 = VT2.getSizeInBits();
-  return NumBits1 > NumBits2;
+  unsigned SrcBits = SrcVT.getSizeInBits();
+  unsigned DstBits = DstVT.getSizeInBits();
+  return SrcBits > DstBits &&
+         (SrcBits == 64 || SrcBits == 32 || SrcBits == 16) &&
+         (DstBits == 32 || DstBits == 16 || DstBits == 8);
 }
 
 bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
Index: test/Analysis/CostModel/X86/trunc.ll
===================================================================
--- test/Analysis/CostModel/X86/trunc.ll
+++ test/Analysis/CostModel/X86/trunc.ll
@@ -45,7 +45,8 @@
   %V4i64 = trunc <4 x i64> undef to <4 x i16>
 
   ; SSE: cost of 3 {{.*}} %V8i64 = trunc
-  ; AVX: cost of 0 {{.*}} %V8i64 = trunc
+  ; AVX1: cost of 9 {{.*}} %V8i64 = trunc
+  ; AVX2: cost of 5 {{.*}} %V8i64 = trunc
   %V8i64 = trunc <8 x i64> undef to <8 x i16>
 
   ; SSE2: cost of 3 {{.*}} %V4i32 = trunc
@@ -88,7 +89,8 @@
   %V4i64 = trunc <4 x i64> undef to <4 x i8>
 
   ; SSE: cost of 3 {{.*}} %V8i64 = trunc
-  ; AVX: cost of 0 {{.*}} %V8i64 = trunc
+  ; AVX1: cost of 9 {{.*}} %V8i64 = trunc
+  ; AVX2: cost of 5 {{.*}} %V8i64 = trunc
   %V8i64 = trunc <8 x i64> undef to <8 x i8>
 
   ; SSE: cost of 0 {{.*}} %V2i32 = trunc
Index: test/CodeGen/X86/avx512-mask-op.ll
===================================================================
--- test/CodeGen/X86/avx512-mask-op.ll
+++ test/CodeGen/X86/avx512-mask-op.ll
@@ -1629,9 +1629,12 @@
 define void @f1(i32 %c) {
 ; CHECK-LABEL: f1:
 ; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    movzbl {{.*}}(%rip), %edi
-; CHECK-NEXT:    xorl $1, %edi
-; CHECK-NEXT:    movb %dil, {{.*}}(%rip)
+; CHECK-NEXT:    movb {{.*}}(%rip), %al
+; CHECK-NEXT:    xorb $1, %al
+; CHECK-NEXT:    movzbl %al, %edi
+; CHECK-NEXT:    andb $1, %al
+; CHECK-NEXT:    movb %al, {{.*}}(%rip)
+; CHECK-NEXT:    andl $1, %edi
 ; CHECK-NEXT:    jmp _f2 ## TAILCALL
 entry:
   %.b1 = load i1, i1* @f1.v, align 4