Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2843,10 +2843,14 @@ Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; - case ISD::TRUNCATE: - // FIXME: it's tricky to do anything useful for this, but it is an important - // case for targets like X86. + case ISD::TRUNCATE: { + // Check if the sign bits of source go down as far as the truncated value. + unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits(); + unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (NumSrcSignBits > (NumSrcBits - VTBits)) + return NumSrcSignBits - (NumSrcBits - VTBits); break; + } case ISD::EXTRACT_ELEMENT: { const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1); const int BitWidth = Op.getValueSizeInBits(); Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -246,12 +246,8 @@ ; KNL: ## BB#0: ; KNL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vpmovqd %zmm0, %ymm0 -; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 ; KNL-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 ; KNL-NEXT: vpmovqd %zmm1, %ymm1 -; KNL-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 ; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; KNL-NEXT: retq ; Index: test/CodeGen/X86/psubus.ll =================================================================== --- test/CodeGen/X86/psubus.ll +++ test/CodeGen/X86/psubus.ll @@ -601,16 +601,13 @@ ; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpcmpgtd %xmm6, %xmm3, %xmm3 ; AVX1-NEXT: vpacksswb %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vpsllw $15, %xmm3, %xmm3 -; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3 ; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsubd %xmm5, %xmm2, %xmm1 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendvb %xmm3, %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpandn %xmm0, %xmm3, %xmm0 ; AVX1-NEXT: vmovdqu %xmm0, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -625,13 +622,10 @@ ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsllw $15, %xmm2, %xmm2 -; AVX2-NEXT: vpsraw $15, %xmm2, %xmm2 ; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpandn %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vmovdqu %xmm0, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -984,8 +978,6 @@ ; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm3 ; AVX1-NEXT: vpacksswb %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vpsllw $15, %xmm3, %xmm3 -; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3 ; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsubd %xmm5, %xmm2, %xmm1 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] @@ -1007,8 +999,6 @@ ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsllw $15, %xmm2, %xmm2 -; AVX2-NEXT: vpsraw $15, %xmm2, %xmm2 ; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] @@ -1118,8 +1108,6 @@ ; AVX1-NEXT: vxorps %xmm3, %xmm0, %xmm3 ; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm3 ; AVX1-NEXT: vpacksswb %xmm4, %xmm3, %xmm3 -; AVX1-NEXT: vpsllw $15, %xmm3, %xmm3 -; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3 ; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsubd %xmm5, %xmm2, %xmm1 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] @@ -1141,8 +1129,6 @@ ; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsllw $15, %xmm2, %xmm2 -; AVX2-NEXT: vpsraw $15, %xmm2, %xmm2 ; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]