Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -31752,9 +31752,12 @@ EVT InVT = Op0.getValueType(); EVT InSVT = InVT.getScalarType(); + // SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32)) // SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32)) // SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32)) - if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) { + if (InVT.isVector() && + (InSVT == MVT::i8 || InSVT == MVT::i16 || + (InSVT == MVT::i1 && !DAG.getTargetLoweringInfo().isTypeLegal(InVT)))) { SDLoc dl(N); EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, InVT.getVectorNumElements()); Index: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll +++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll @@ -836,8 +836,6 @@ ; KNL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 ; KNL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; KNL-NEXT: vpmovqd %zmm0, %ymm0 -; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 ; KNL-NEXT: vcvtdq2pd %xmm0, %ymm0 ; KNL-NEXT: retq ; @@ -860,21 +858,8 @@ ; KNL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 ; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; KNL-NEXT: vpsllq $32, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm1 -; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; KNL-NEXT: vpextrq $1, %xmm0, %rax -; KNL-NEXT: xorl %ecx, %ecx -; KNL-NEXT: testb $1, %al -; KNL-NEXT: movl $-1, %eax -; KNL-NEXT: movl $0, %edx -; KNL-NEXT: cmovnel %eax, %edx -; KNL-NEXT: vcvtsi2ssl %edx, %xmm2, %xmm1 -; KNL-NEXT: vmovq %xmm0, %rdx -; KNL-NEXT: testb $1, %dl -; KNL-NEXT: cmovnel %eax, %ecx -; KNL-NEXT: vcvtsi2ssl %ecx, %xmm2, %xmm0 -; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; KNL-NEXT: vcvtdq2ps %xmm0, %xmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sitofp_2i1_float: Index: llvm/trunk/test/CodeGen/X86/sse-fsignum.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse-fsignum.ll +++ llvm/trunk/test/CodeGen/X86/sse-fsignum.ll @@ -33,59 +33,19 @@ } define void @signum64a(<2 x double>*) { -; AVX1-LABEL: signum64a: -; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovapd (%rdi), %xmm0 -; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2 -; AVX1-NEXT: vpextrq $1, %xmm2, %rax -; AVX1-NEXT: vmovq %xmm2, %rcx -; AVX1-NEXT: vmovd %ecx, %xmm2 -; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 -; AVX1-NEXT: vcvtdq2pd %xmm2, %xmm2 -; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vpextrq $1, %xmm0, %rax -; AVX1-NEXT: vmovq %xmm0, %rcx -; AVX1-NEXT: vmovd %ecx, %xmm0 -; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX1-NEXT: vsubpd %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: vmovapd %xmm0, (%rdi) -; AVX1-NEXT: retq -; -; AVX2-LABEL: signum64a: -; AVX2: # BB#0: # %entry -; AVX2-NEXT: vmovapd (%rdi), %xmm0 -; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vpextrq $1, %xmm2, %rax -; AVX2-NEXT: vmovq %xmm2, %rcx -; AVX2-NEXT: vmovd %ecx, %xmm2 -; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 -; AVX2-NEXT: vcvtdq2pd %xmm2, %xmm2 -; AVX2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpextrq $1, %xmm0, %rax -; AVX2-NEXT: vmovq %xmm0, %rcx -; AVX2-NEXT: vmovd %ecx, %xmm0 -; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX2-NEXT: vsubpd %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: vmovapd %xmm0, (%rdi) -; AVX2-NEXT: retq -; -; AVX512F-LABEL: signum64a: -; AVX512F: # BB#0: # %entry -; AVX512F-NEXT: vmovapd (%rdi), %xmm0 -; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2 -; AVX512F-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3] -; AVX512F-NEXT: vcvtdq2pd %xmm2, %xmm2 -; AVX512F-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX512F-NEXT: vsubpd %xmm0, %xmm2, %xmm0 -; AVX512F-NEXT: vmovapd %xmm0, (%rdi) -; AVX512F-NEXT: retq +; AVX-LABEL: signum64a: +; AVX: # BB#0: # %entry +; AVX-NEXT: vmovapd (%rdi), %xmm0 +; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2 +; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3] +; AVX-NEXT: vcvtdq2pd %xmm2, %xmm2 +; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX-NEXT: vsubpd %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vmovapd %xmm0, (%rdi) +; AVX-NEXT: retq entry: %1 = load <2 x double>, <2 x double>* %0 %2 = fcmp olt <2 x double> %1, zeroinitializer @@ -107,24 +67,8 @@ ; AVX1-NEXT: vmovaps (%rdi), %ymm0 ; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; AVX1-NEXT: vcmpltps %ymm1, %ymm0, %ymm2 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 -; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2 -; AVX1-NEXT: vpsraw $15, %xmm2, %xmm2 -; AVX1-NEXT: vpmovsxwd %xmm2, %xmm3 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] -; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; AVX1-NEXT: vcvtdq2ps %ymm2, %ymm2 ; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 -; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 -; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX1-NEXT: vsubps %ymm0, %ymm2, %ymm0 ; AVX1-NEXT: vmovaps %ymm0, (%rdi) @@ -136,18 +80,8 @@ ; AVX2-NEXT: vmovaps (%rdi), %ymm0 ; AVX2-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; AVX2-NEXT: vcmpltps %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 -; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpsllw $15, %xmm2, %xmm2 -; AVX2-NEXT: vpsraw $15, %xmm2, %xmm2 -; AVX2-NEXT: vpmovsxwd %xmm2, %ymm2 ; AVX2-NEXT: vcvtdq2ps %ymm2, %ymm2 ; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0 -; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0 -; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX2-NEXT: vsubps %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vmovaps %ymm0, (%rdi) @@ -189,14 +123,10 @@ ; AVX1-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 ; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpslld $31, %xmm2, %xmm2 -; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2 ; AVX1-NEXT: vcvtdq2pd %xmm2, %ymm2 ; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX1-NEXT: vsubpd %ymm0, %ymm2, %ymm0 ; AVX1-NEXT: vmovapd %ymm0, (%rdi) @@ -210,14 +140,10 @@ ; AVX2-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 ; AVX2-NEXT: vextractf128 $1, %ymm2, %xmm3 ; AVX2-NEXT: vpacksswb %xmm3, %xmm2, %xmm2 -; AVX2-NEXT: vpslld $31, %xmm2, %xmm2 -; AVX2-NEXT: vpsrad $31, %xmm2, %xmm2 ; AVX2-NEXT: vcvtdq2pd %xmm2, %ymm2 ; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX2-NEXT: vsubpd %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vmovapd %ymm0, (%rdi) @@ -230,13 +156,9 @@ ; AVX512F-NEXT: vxorpd %ymm1, %ymm1, %ymm1 ; AVX512F-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 ; AVX512F-NEXT: vpmovqd %zmm2, %ymm2 -; AVX512F-NEXT: vpslld $31, %xmm2, %xmm2 -; AVX512F-NEXT: vpsrad $31, %xmm2, %xmm2 ; AVX512F-NEXT: vcvtdq2pd %xmm2, %ymm2 ; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX512F-NEXT: vpsrad $31, %xmm0, %xmm0 ; AVX512F-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX512F-NEXT: vsubpd %ymm0, %ymm2, %ymm0 ; AVX512F-NEXT: vmovapd %ymm0, (%rdi)