Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -24777,12 +24777,14 @@ return true; } -bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { - if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) +bool X86TargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { + if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) return false; - unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); - unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - return NumBits1 > NumBits2; + unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); + unsigned DstBits = DstTy->getPrimitiveSizeInBits(); + return SrcBits > DstBits && + (SrcBits == 64 || SrcBits == 32 || SrcBits == 16) && + (DstBits == 32 || DstBits == 16 || DstBits == 8); } bool X86TargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { @@ -24808,12 +24810,14 @@ return isInt<32>(Imm); } -bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) +bool X86TargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { + if (!SrcVT.isScalarInteger() || !DstVT.isScalarInteger()) return false; - unsigned NumBits1 = VT1.getSizeInBits(); - unsigned NumBits2 = VT2.getSizeInBits(); - return NumBits1 > NumBits2; + unsigned SrcBits = SrcVT.getSizeInBits(); + unsigned DstBits = DstVT.getSizeInBits(); + return SrcBits > DstBits && + (SrcBits == 64 || SrcBits == 32 || SrcBits == 16) && + (DstBits == 32 || DstBits == 16 || DstBits == 8); } bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { Index: test/Analysis/CostModel/X86/trunc.ll =================================================================== --- test/Analysis/CostModel/X86/trunc.ll +++ test/Analysis/CostModel/X86/trunc.ll @@ -45,7 +45,8 @@ %V4i64 = trunc <4 x i64> undef to <4 x i16> ; SSE: cost of 3 {{.*}} %V8i64 = trunc - ; AVX: cost of 0 {{.*}} %V8i64 = trunc + ; AVX1: cost of 9 {{.*}} %V8i64 = trunc + ; AVX2: cost of 5 {{.*}} %V8i64 = trunc %V8i64 = trunc <8 x i64> undef to <8 x i16> ; SSE2: cost of 3 {{.*}} %V4i32 = trunc @@ -88,7 +89,8 @@ %V4i64 = trunc <4 x i64> undef to <4 x i8> ; SSE: cost of 3 {{.*}} %V8i64 = trunc - ; AVX: cost of 0 {{.*}} %V8i64 = trunc + ; AVX1: cost of 9 {{.*}} %V8i64 = trunc + ; AVX2: cost of 5 {{.*}} %V8i64 = trunc %V8i64 = trunc <8 x i64> undef to <8 x i8> ; SSE: cost of 0 {{.*}} %V2i32 = trunc Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -1629,9 +1629,12 @@ define void @f1(i32 %c) { ; CHECK-LABEL: f1: ; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: movzbl {{.*}}(%rip), %edi -; CHECK-NEXT: xorl $1, %edi -; CHECK-NEXT: movb %dil, {{.*}}(%rip) +; CHECK-NEXT: movb {{.*}}(%rip), %al +; CHECK-NEXT: xorb $1, %al +; CHECK-NEXT: movzbl %al, %edi +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movb %al, {{.*}}(%rip) +; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: jmp _f2 ## TAILCALL entry: %.b1 = load i1, i1* @f1.v, align 4