Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1980,10 +1980,6 @@ /// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We /// use this predicate to simplify operations downstream. bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { - // This predicate is not safe for vector operations. - if (Op.getValueType().isVector()) - return false; - unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); } Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -13900,15 +13900,15 @@ SDLoc dl(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); - if (Op.getSimpleValueType().isVector()) - return lowerUINT_TO_FP_vec(Op, DAG); - // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform // the optimization here. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0); + if (Op.getSimpleValueType().isVector()) + return lowerUINT_TO_FP_vec(Op, DAG); + MVT SrcVT = N0.getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); @@ -31242,6 +31242,12 @@ return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); } + // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't + // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform + // the optimization here. + if (DAG.SignBitIsZero(Op0)) + return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0); + return SDValue(); } Index: test/CodeGen/X86/avx512-cvt.ll =================================================================== --- test/CodeGen/X86/avx512-cvt.ll +++ test/CodeGen/X86/avx512-cvt.ll @@ -924,7 +924,7 @@ ; ALL-LABEL: uitofp_16i8: ; ALL: ## BB#0: ; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0 +; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 ; ALL-NEXT: retq %b = uitofp <16 x i8> %a to <16 x float> ret <16 x float>%b @@ -934,7 +934,7 @@ ; ALL-LABEL: uitofp_16i16: ; ALL: ## BB#0: ; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0 +; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 ; ALL-NEXT: retq %b = uitofp <16 x i16> %a to <16 x float> ret <16 x float>%b @@ -1036,9 +1036,8 @@ ; KNL: ## BB#0: ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpsrld $31, %xmm0, %xmm0 -; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0 -; KNL-NEXT: ## kill: %XMM0 %XMM0 %ZMM0 +; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 +; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: uitofp_4i1_float: @@ -1059,8 +1058,7 @@ ; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; KNL-NEXT: vpsrld $31, %xmm0, %xmm0 -; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0 -; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 +; KNL-NEXT: vcvtdq2pd %xmm0, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: uitofp_4i1_double: @@ -1113,12 +1111,7 @@ ; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpsrlq $63, %xmm0, %xmm0 -; KNL-NEXT: vpextrq $1, %xmm0, %rax -; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm1 -; KNL-NEXT: vmovq %xmm0, %rax -; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0 -; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; KNL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: uitofp_2i1_double: Index: test/CodeGen/X86/i64-to-float.ll =================================================================== --- test/CodeGen/X86/i64-to-float.ll +++ test/CodeGen/X86/i64-to-float.ll @@ -79,66 +79,67 @@ define <2 x double> @mask_uitofp_2i64_2f64(<2 x i64> %a) nounwind { ; X32-SSE-LABEL: mask_uitofp_2i64_2f64: ; X32-SSE: # BB#0: +; X32-SSE-NEXT: pushl %ebp +; X32-SSE-NEXT: movl %esp, %ebp +; X32-SSE-NEXT: andl $-8, %esp +; X32-SSE-NEXT: subl $32, %esp ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 -; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32-SSE-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] -; X32-SSE-NEXT: subpd %xmm3, %xmm0 -; X32-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1] -; X32-SSE-NEXT: addpd %xmm4, %xmm0 -; X32-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X32-SSE-NEXT: subpd %xmm3, %xmm2 -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1] -; X32-SSE-NEXT: addpd %xmm2, %xmm1 -; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-SSE-NEXT: movq {{.*#+}} xmm1 = xmm0[0],zero +; X32-SSE-NEXT: movq %xmm1, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; X32-SSE-NEXT: movq %xmm0, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp) +; X32-SSE-NEXT: fstpl {{[0-9]+}}(%esp) +; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp) +; X32-SSE-NEXT: fstpl (%esp) +; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X32-SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; X32-SSE-NEXT: movl %ebp, %esp +; X32-SSE-NEXT: popl %ebp ; X32-SSE-NEXT: retl ; ; X32-AVX-LABEL: mask_uitofp_2i64_2f64: ; X32-AVX: # BB#0: +; X32-AVX-NEXT: pushl %ebp +; X32-AVX-NEXT: movl %esp, %ebp +; X32-AVX-NEXT: andl $-8, %esp +; X32-AVX-NEXT: subl $32, %esp ; X32-AVX-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] -; X32-AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32-AVX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] -; X32-AVX-NEXT: vsubpd %xmm3, %xmm2, %xmm2 -; X32-AVX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2 +; X32-AVX-NEXT: vmovq {{.*#+}} xmm1 = xmm0[0],zero +; X32-AVX-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp) ; X32-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32-AVX-NEXT: vsubpd %xmm3, %xmm0, %xmm0 -; X32-AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 -; X32-AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; X32-AVX-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) +; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp) +; X32-AVX-NEXT: fstpl {{[0-9]+}}(%esp) +; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp) +; X32-AVX-NEXT: fstpl (%esp) +; X32-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X32-AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; X32-AVX-NEXT: movl %ebp, %esp +; X32-AVX-NEXT: popl %ebp ; X32-AVX-NEXT: retl ; ; X64-SSE-LABEL: mask_uitofp_2i64_2f64: ; X64-SSE: # BB#0: ; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0 -; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] -; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-SSE-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] -; X64-SSE-NEXT: subpd %xmm3, %xmm0 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1] -; X64-SSE-NEXT: addpd %xmm4, %xmm0 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X64-SSE-NEXT: subpd %xmm3, %xmm2 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1] -; X64-SSE-NEXT: addpd %xmm2, %xmm1 -; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-SSE-NEXT: movd %xmm0, %rax +; X64-SSE-NEXT: cvtsi2sdq %rax, %xmm1 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; X64-SSE-NEXT: movd %xmm0, %rax +; X64-SSE-NEXT: xorps %xmm0, %xmm0 +; X64-SSE-NEXT: cvtsi2sdq %rax, %xmm0 +; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64-SSE-NEXT: movapd %xmm1, %xmm0 ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mask_uitofp_2i64_2f64: ; X64-AVX: # BB#0: ; X64-AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] -; X64-AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-AVX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] -; X64-AVX-NEXT: vsubpd %xmm3, %xmm2, %xmm2 -; X64-AVX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2 -; X64-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-AVX-NEXT: vsubpd %xmm3, %xmm0, %xmm0 -; X64-AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 -; X64-AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; X64-AVX-NEXT: vpextrq $1, %xmm0, %rax +; X64-AVX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 +; X64-AVX-NEXT: vmovq %xmm0, %rax +; X64-AVX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 +; X64-AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-AVX-NEXT: retq %and = and <2 x i64> %a, %cvt = uitofp <2 x i64> %and to <2 x double> @@ -271,51 +272,27 @@ ; X32-SSE-NEXT: subl $48, %esp ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1 -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] -; X32-SSE-NEXT: movq %xmm2, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; X32-SSE-NEXT: movq %xmm2, {{[0-9]+}}(%esp) ; X32-SSE-NEXT: movq %xmm1, {{[0-9]+}}(%esp) ; X32-SSE-NEXT: movq %xmm0, {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3] -; X32-SSE-NEXT: movd %xmm2, %eax -; X32-SSE-NEXT: xorl %ecx, %ecx -; X32-SSE-NEXT: testl %eax, %eax -; X32-SSE-NEXT: setns %cl +; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; X32-SSE-NEXT: movq %xmm1, {{[0-9]+}}(%esp) +; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; X32-SSE-NEXT: movq %xmm0, {{[0-9]+}}(%esp) ; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp) -; X32-SSE-NEXT: fadds {{\.LCPI.*}}(,%ecx,4) ; X32-SSE-NEXT: fstps {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3] -; X32-SSE-NEXT: movd %xmm2, %eax -; X32-SSE-NEXT: xorl %ecx, %ecx -; X32-SSE-NEXT: testl %eax, %eax -; X32-SSE-NEXT: setns %cl ; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp) -; X32-SSE-NEXT: fadds {{\.LCPI.*}}(,%ecx,4) ; X32-SSE-NEXT: fstps (%esp) -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3] -; X32-SSE-NEXT: movd %xmm1, %eax -; X32-SSE-NEXT: xorl %ecx, %ecx -; X32-SSE-NEXT: testl %eax, %eax -; X32-SSE-NEXT: setns %cl +; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp) -; X32-SSE-NEXT: fadds {{\.LCPI.*}}(,%ecx,4) ; X32-SSE-NEXT: fstps {{[0-9]+}}(%esp) -; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X32-SSE-NEXT: movd %xmm0, %eax -; X32-SSE-NEXT: xorl %ecx, %ecx -; X32-SSE-NEXT: testl %eax, %eax -; X32-SSE-NEXT: setns %cl ; X32-SSE-NEXT: fildll {{[0-9]+}}(%esp) -; X32-SSE-NEXT: fadds {{\.LCPI.*}}(,%ecx,4) ; X32-SSE-NEXT: fstps {{[0-9]+}}(%esp) -; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X32-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; X32-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X32-SSE-NEXT: movl %ebp, %esp ; X32-SSE-NEXT: popl %ebp ; X32-SSE-NEXT: retl @@ -324,57 +301,37 @@ ; X32-AVX: # BB#0: ; X32-AVX-NEXT: pushl %ebp ; X32-AVX-NEXT: movl %esp, %ebp -; X32-AVX-NEXT: pushl %ebx -; X32-AVX-NEXT: pushl %esi ; X32-AVX-NEXT: andl $-8, %esp ; X32-AVX-NEXT: subl $48, %esp ; X32-AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 ; X32-AVX-NEXT: vpextrd $1, %xmm0, %eax ; X32-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm1 ; X32-AVX-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp) -; X32-AVX-NEXT: vpextrd $3, %xmm0, %ecx +; X32-AVX-NEXT: vpextrd $3, %xmm0, %eax ; X32-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X32-AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 +; X32-AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 ; X32-AVX-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp) ; X32-AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 -; X32-AVX-NEXT: vpextrd $1, %xmm0, %edx -; X32-AVX-NEXT: vpinsrd $1, %edx, %xmm0, %xmm1 +; X32-AVX-NEXT: vpextrd $1, %xmm0, %eax +; X32-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm1 ; X32-AVX-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp) -; X32-AVX-NEXT: vpextrd $3, %xmm0, %esi +; X32-AVX-NEXT: vpextrd $3, %xmm0, %eax ; X32-AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 +; X32-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; X32-AVX-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) -; X32-AVX-NEXT: xorl %ebx, %ebx -; X32-AVX-NEXT: testl %eax, %eax -; X32-AVX-NEXT: setns %bl ; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp) -; X32-AVX-NEXT: fadds {{\.LCPI.*}}(,%ebx,4) -; X32-AVX-NEXT: fstps (%esp) -; X32-AVX-NEXT: xorl %eax, %eax -; X32-AVX-NEXT: testl %ecx, %ecx -; X32-AVX-NEXT: setns %al -; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp) -; X32-AVX-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; X32-AVX-NEXT: fstps {{[0-9]+}}(%esp) -; X32-AVX-NEXT: xorl %eax, %eax -; X32-AVX-NEXT: testl %edx, %edx -; X32-AVX-NEXT: setns %al ; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp) -; X32-AVX-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; X32-AVX-NEXT: fstps {{[0-9]+}}(%esp) -; X32-AVX-NEXT: xorl %eax, %eax -; X32-AVX-NEXT: testl %esi, %esi -; X32-AVX-NEXT: setns %al ; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp) -; X32-AVX-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; X32-AVX-NEXT: fstps {{[0-9]+}}(%esp) +; X32-AVX-NEXT: fildll {{[0-9]+}}(%esp) +; X32-AVX-NEXT: fstps (%esp) ; X32-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; X32-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] ; X32-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] -; X32-AVX-NEXT: leal -8(%ebp), %esp -; X32-AVX-NEXT: popl %esi -; X32-AVX-NEXT: popl %ebx +; X32-AVX-NEXT: movl %ebp, %esp ; X32-AVX-NEXT: popl %ebp ; X32-AVX-NEXT: vzeroupper ; X32-AVX-NEXT: retl @@ -384,68 +341,18 @@ ; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0 ; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm1 ; X64-SSE-NEXT: movd %xmm1, %rax -; X64-SSE-NEXT: testq %rax, %rax -; X64-SSE-NEXT: js .LBB3_1 -; X64-SSE-NEXT: # BB#2: ; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm3 -; X64-SSE-NEXT: jmp .LBB3_3 -; X64-SSE-NEXT: .LBB3_1: -; X64-SSE-NEXT: movq %rax, %rcx -; X64-SSE-NEXT: shrq %rcx -; X64-SSE-NEXT: andl $1, %eax -; X64-SSE-NEXT: orq %rcx, %rax -; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm3 -; X64-SSE-NEXT: addss %xmm3, %xmm3 -; X64-SSE-NEXT: .LBB3_3: ; X64-SSE-NEXT: movd %xmm0, %rax -; X64-SSE-NEXT: testq %rax, %rax -; X64-SSE-NEXT: js .LBB3_4 -; X64-SSE-NEXT: # BB#5: -; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm2 -; X64-SSE-NEXT: jmp .LBB3_6 -; X64-SSE-NEXT: .LBB3_4: -; X64-SSE-NEXT: movq %rax, %rcx -; X64-SSE-NEXT: shrq %rcx -; X64-SSE-NEXT: andl $1, %eax -; X64-SSE-NEXT: orq %rcx, %rax ; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm2 -; X64-SSE-NEXT: addss %xmm2, %xmm2 -; X64-SSE-NEXT: .LBB3_6: +; X64-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; X64-SSE-NEXT: movd %xmm1, %rax -; X64-SSE-NEXT: testq %rax, %rax -; X64-SSE-NEXT: js .LBB3_7 -; X64-SSE-NEXT: # BB#8: -; X64-SSE-NEXT: xorps %xmm1, %xmm1 -; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm1 -; X64-SSE-NEXT: jmp .LBB3_9 -; X64-SSE-NEXT: .LBB3_7: -; X64-SSE-NEXT: movq %rax, %rcx -; X64-SSE-NEXT: shrq %rcx -; X64-SSE-NEXT: andl $1, %eax -; X64-SSE-NEXT: orq %rcx, %rax ; X64-SSE-NEXT: xorps %xmm1, %xmm1 ; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm1 -; X64-SSE-NEXT: addss %xmm1, %xmm1 -; X64-SSE-NEXT: .LBB3_9: -; X64-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; X64-SSE-NEXT: movd %xmm0, %rax -; X64-SSE-NEXT: testq %rax, %rax -; X64-SSE-NEXT: js .LBB3_10 -; X64-SSE-NEXT: # BB#11: -; X64-SSE-NEXT: xorps %xmm0, %xmm0 -; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; X64-SSE-NEXT: jmp .LBB3_12 -; X64-SSE-NEXT: .LBB3_10: -; X64-SSE-NEXT: movq %rax, %rcx -; X64-SSE-NEXT: shrq %rcx -; X64-SSE-NEXT: andl $1, %eax -; X64-SSE-NEXT: orq %rcx, %rax ; X64-SSE-NEXT: xorps %xmm0, %xmm0 ; X64-SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; X64-SSE-NEXT: addss %xmm0, %xmm0 -; X64-SSE-NEXT: .LBB3_12: ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X64-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; X64-SSE-NEXT: movaps %xmm2, %xmm0 @@ -455,65 +362,16 @@ ; X64-AVX: # BB#0: ; X64-AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 ; X64-AVX-NEXT: vpextrq $1, %xmm0, %rax -; X64-AVX-NEXT: testq %rax, %rax -; X64-AVX-NEXT: js .LBB3_1 -; X64-AVX-NEXT: # BB#2: ; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 -; X64-AVX-NEXT: jmp .LBB3_3 -; X64-AVX-NEXT: .LBB3_1: -; X64-AVX-NEXT: movq %rax, %rcx -; X64-AVX-NEXT: shrq %rcx -; X64-AVX-NEXT: andl $1, %eax -; X64-AVX-NEXT: orq %rcx, %rax -; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 -; X64-AVX-NEXT: vaddss %xmm1, %xmm1, %xmm1 -; X64-AVX-NEXT: .LBB3_3: ; X64-AVX-NEXT: vmovq %xmm0, %rax -; X64-AVX-NEXT: testq %rax, %rax -; X64-AVX-NEXT: js .LBB3_4 -; X64-AVX-NEXT: # BB#5: -; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 -; X64-AVX-NEXT: jmp .LBB3_6 -; X64-AVX-NEXT: .LBB3_4: -; X64-AVX-NEXT: movq %rax, %rcx -; X64-AVX-NEXT: shrq %rcx -; X64-AVX-NEXT: andl $1, %eax -; X64-AVX-NEXT: orq %rcx, %rax ; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 -; X64-AVX-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; X64-AVX-NEXT: .LBB3_6: ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] ; X64-AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 ; X64-AVX-NEXT: vmovq %xmm0, %rax -; X64-AVX-NEXT: testq %rax, %rax -; X64-AVX-NEXT: js .LBB3_7 -; X64-AVX-NEXT: # BB#8: ; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 -; X64-AVX-NEXT: jmp .LBB3_9 -; X64-AVX-NEXT: .LBB3_7: -; X64-AVX-NEXT: movq %rax, %rcx -; X64-AVX-NEXT: shrq %rcx -; X64-AVX-NEXT: andl $1, %eax -; X64-AVX-NEXT: orq %rcx, %rax -; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 -; X64-AVX-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; X64-AVX-NEXT: .LBB3_9: ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] ; X64-AVX-NEXT: vpextrq $1, %xmm0, %rax -; X64-AVX-NEXT: testq %rax, %rax -; X64-AVX-NEXT: js .LBB3_10 -; X64-AVX-NEXT: # BB#11: -; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 -; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] -; X64-AVX-NEXT: vzeroupper -; X64-AVX-NEXT: retq -; X64-AVX-NEXT: .LBB3_10: -; X64-AVX-NEXT: movq %rax, %rcx -; X64-AVX-NEXT: shrq %rcx -; X64-AVX-NEXT: andl $1, %eax -; X64-AVX-NEXT: orq %rcx, %rax ; X64-AVX-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 -; X64-AVX-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq Index: test/CodeGen/X86/uint_to_fp-3.ll =================================================================== --- test/CodeGen/X86/uint_to_fp-3.ll +++ test/CodeGen/X86/uint_to_fp-3.ll @@ -9,46 +9,26 @@ define <4 x float> @mask_ucvt_4i32_4f32(<4 x i32> %a) { ; X32-SSE-LABEL: mask_ucvt_4i32_4f32: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 -; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] -; X32-SSE-NEXT: pand %xmm0, %xmm1 -; X32-SSE-NEXT: por {{\.LCPI.*}}, %xmm1 -; X32-SSE-NEXT: psrld $16, %xmm0 -; X32-SSE-NEXT: por {{\.LCPI.*}}, %xmm0 -; X32-SSE-NEXT: addps {{\.LCPI.*}}, %xmm0 -; X32-SSE-NEXT: addps %xmm1, %xmm0 +; X32-SSE-NEXT: andps {{\.LCPI.*}}, %xmm0 +; X32-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; X32-SSE-NEXT: retl ; ; X32-AVX-LABEL: mask_ucvt_4i32_4f32: ; X32-AVX: # BB#0: -; X32-AVX-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X32-AVX-NEXT: vpsrld $16, %xmm0, %xmm0 -; X32-AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X32-AVX-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X32-AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 +; X32-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X32-AVX-NEXT: retl ; ; X64-SSE-LABEL: mask_ucvt_4i32_4f32: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0 -; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] -; X64-SSE-NEXT: pand %xmm0, %xmm1 -; X64-SSE-NEXT: por {{.*}}(%rip), %xmm1 -; X64-SSE-NEXT: psrld $16, %xmm0 -; X64-SSE-NEXT: por {{.*}}(%rip), %xmm0 -; X64-SSE-NEXT: addps {{.*}}(%rip), %xmm0 -; X64-SSE-NEXT: addps %xmm1, %xmm0 +; X64-SSE-NEXT: andps {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mask_ucvt_4i32_4f32: ; X64-AVX: # BB#0: -; X64-AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm0 -; X64-AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X64-AVX-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X64-AVX-NEXT: retq %and = and <4 x i32> %a, %cvt = uitofp <4 x i32> %and to <4 x float> @@ -58,91 +38,32 @@ define <4 x double> @mask_ucvt_4i32_4f64(<4 x i32> %a) { ; X32-SSE-LABEL: mask_ucvt_4i32_4f64: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movdqa %xmm0, %xmm2 -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2 -; X32-SSE-NEXT: pxor %xmm3, %xmm3 -; X32-SSE-NEXT: movdqa %xmm2, %xmm0 -; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [1127219200,1160773632,0,0] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; X32-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] -; X32-SSE-NEXT: movapd {{.*#+}} xmm5 = [4.503600e+15,1.934281e+25] -; X32-SSE-NEXT: subpd %xmm5, %xmm0 -; X32-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm0[2,3,0,1] -; X32-SSE-NEXT: addpd %xmm6, %xmm0 -; X32-SSE-NEXT: xorpd %xmm6, %xmm6 -; X32-SSE-NEXT: movss {{.*#+}} xmm6 = xmm1[0],xmm6[1,2,3] -; X32-SSE-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1] -; X32-SSE-NEXT: subpd %xmm5, %xmm6 -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm6[2,3,0,1] -; X32-SSE-NEXT: addpd %xmm6, %xmm1 -; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X32-SSE-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] -; X32-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[2,3,0,1] -; X32-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] -; X32-SSE-NEXT: subpd %xmm5, %xmm2 -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1] -; X32-SSE-NEXT: addpd %xmm2, %xmm1 -; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm6[0],xmm3[1,2,3] -; X32-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] -; X32-SSE-NEXT: subpd %xmm5, %xmm3 -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1] -; X32-SSE-NEXT: addpd %xmm3, %xmm2 -; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X32-SSE-NEXT: cvtdq2pd %xmm0, %xmm2 +; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; X32-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 +; X32-SSE-NEXT: movaps %xmm2, %xmm0 ; X32-SSE-NEXT: retl ; ; X32-AVX-LABEL: mask_ucvt_4i32_4f64: ; X32-AVX: # BB#0: -; X32-AVX-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-AVX-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm1 -; X32-AVX-NEXT: vcvtdq2pd %xmm1, %ymm1 -; X32-AVX-NEXT: vpsrld $16, %xmm0, %xmm0 +; X32-AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 ; X32-AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 -; X32-AVX-NEXT: vmulpd {{\.LCPI.*}}, %ymm0, %ymm0 -; X32-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X32-AVX-NEXT: retl ; ; X64-SSE-LABEL: mask_ucvt_4i32_4f64: ; X64-SSE: # BB#0: -; X64-SSE-NEXT: movdqa %xmm0, %xmm2 -; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm2 -; X64-SSE-NEXT: pxor %xmm1, %xmm1 -; X64-SSE-NEXT: movdqa %xmm2, %xmm0 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-SSE-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0] -; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; X64-SSE-NEXT: movapd {{.*#+}} xmm5 = [4.503600e+15,1.934281e+25] -; X64-SSE-NEXT: subpd %xmm5, %xmm0 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm0[2,3,0,1] -; X64-SSE-NEXT: addpd %xmm6, %xmm0 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] -; X64-SSE-NEXT: subpd %xmm5, %xmm4 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[2,3,0,1] -; X64-SSE-NEXT: addpd %xmm4, %xmm6 -; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm6[0] -; X64-SSE-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; X64-SSE-NEXT: subpd %xmm5, %xmm2 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1] -; X64-SSE-NEXT: addpd %xmm2, %xmm1 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] -; X64-SSE-NEXT: subpd %xmm5, %xmm4 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1] -; X64-SSE-NEXT: addpd %xmm4, %xmm2 -; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm2 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 +; X64-SSE-NEXT: movaps %xmm2, %xmm0 ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mask_ucvt_4i32_4f64: ; X64-AVX: # BB#0: -; X64-AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; X64-AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1 -; X64-AVX-NEXT: vcvtdq2pd %xmm1, %ymm1 -; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm0 +; X64-AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 -; X64-AVX-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0 -; X64-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; X64-AVX-NEXT: retq %and = and <4 x i32> %a, %cvt = uitofp <4 x i32> %and to <4 x double> Index: test/CodeGen/X86/vec_int_to_fp.ll =================================================================== --- test/CodeGen/X86/vec_int_to_fp.ll +++ test/CodeGen/X86/vec_int_to_fp.ll @@ -575,7 +575,7 @@ ; AVX512-LABEL: uitofp_8i16_to_2f64: ; AVX512: # BB#0: ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0 +; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 ; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; AVX512-NEXT: retq %cvt = uitofp <8 x i16> %a to <8 x double> @@ -630,7 +630,7 @@ ; AVX512-LABEL: uitofp_16i8_to_2f64: ; AVX512: # BB#0: ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0 +; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 ; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; AVX512-NEXT: retq %cvt = uitofp <16 x i8> %a to <16 x double> @@ -804,17 +804,11 @@ ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; -; VEX-LABEL: uitofp_4i16_to_4f64: -; VEX: # BB#0: -; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 -; VEX-NEXT: retq -; -; AVX512-LABEL: uitofp_4i16_to_4f64: -; AVX512: # BB#0: -; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX512-NEXT: vcvtudq2pd %xmm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: uitofp_4i16_to_4f64: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX-NEXT: retq %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %cvt = uitofp <4 x i16> %shuf to <4 x double> ret <4 x double> %cvt @@ -846,7 +840,7 @@ ; AVX512-LABEL: uitofp_8i16_to_4f64: ; AVX512: # BB#0: ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0 +; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 ; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; AVX512-NEXT: retq %cvt = uitofp <8 x i16> %a to <8 x double> @@ -866,17 +860,11 @@ ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; -; VEX-LABEL: uitofp_4i8_to_4f64: -; VEX: # BB#0: -; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 -; VEX-NEXT: retq -; -; AVX512-LABEL: uitofp_4i8_to_4f64: -; AVX512: # BB#0: -; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512-NEXT: vcvtudq2pd %xmm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: uitofp_4i8_to_4f64: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX-NEXT: retq %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> %cvt = uitofp <4 x i8> %shuf to <4 x double> ret <4 x double> %cvt @@ -909,7 +897,7 @@ ; AVX512-LABEL: uitofp_16i8_to_4f64: ; AVX512: # BB#0: ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0 +; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 ; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; AVX512-NEXT: retq %cvt = uitofp <16 x i8> %a to <16 x double> @@ -1572,17 +1560,11 @@ ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; SSE-NEXT: retq ; -; VEX-LABEL: uitofp_4i16_to_4f32: -; VEX: # BB#0: -; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; VEX-NEXT: vcvtdq2ps %xmm0, %xmm0 -; VEX-NEXT: retq -; -; AVX512-LABEL: uitofp_4i16_to_4f32: -; AVX512: # BB#0: -; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX512-NEXT: vcvtudq2ps %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: uitofp_4i16_to_4f32: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: retq %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %cvt = uitofp <4 x i16> %shuf to <4 x float> ret <4 x float> %cvt @@ -1618,7 +1600,7 @@ ; AVX512-LABEL: uitofp_8i16_to_4f32: ; AVX512: # BB#0: ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512-NEXT: vcvtudq2ps %ymm0, %ymm0 +; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0 %XMM0 %YMM0 ; AVX512-NEXT: retq %cvt = uitofp <8 x i16> %a to <8 x float> @@ -1635,17 +1617,11 @@ ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; SSE-NEXT: retq ; -; VEX-LABEL: uitofp_4i8_to_4f32: -; VEX: # BB#0: -; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; VEX-NEXT: vcvtdq2ps %xmm0, %xmm0 -; VEX-NEXT: retq -; -; AVX512-LABEL: uitofp_4i8_to_4f32: -; AVX512: # BB#0: -; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512-NEXT: vcvtudq2ps %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: uitofp_4i8_to_4f32: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: retq %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> %cvt = uitofp <4 x i8> %shuf to <4 x float> ret <4 x float> %cvt @@ -1682,7 +1658,7 @@ ; AVX512-LABEL: uitofp_16i8_to_4f32: ; AVX512: # BB#0: ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0 ; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; AVX512-NEXT: retq %cvt = uitofp <16 x i8> %a to <16 x float> @@ -1997,7 +1973,7 @@ ; AVX512-LABEL: uitofp_8i16_to_8f32: ; AVX512: # BB#0: ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512-NEXT: vcvtudq2ps %ymm0, %ymm0 +; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX512-NEXT: retq %cvt = uitofp <8 x i16> %a to <8 x float> ret <8 x float> %cvt @@ -2034,7 +2010,7 @@ ; AVX512-LABEL: uitofp_8i8_to_8f32: ; AVX512: # BB#0: ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512-NEXT: vcvtudq2ps %ymm0, %ymm0 +; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX512-NEXT: retq %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> %cvt = uitofp <8 x i8> %shuf to <8 x float> @@ -2072,7 +2048,7 @@ ; AVX512-LABEL: uitofp_16i8_to_8f32: ; AVX512: # BB#0: ; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0 ; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; AVX512-NEXT: retq %cvt = uitofp <16 x i8> %a to <16 x float> @@ -2660,17 +2636,11 @@ ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1 ; SSE-NEXT: retq ; -; VEX-LABEL: uitofp_load_4i16_to_4f64: -; VEX: # BB#0: -; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 -; VEX-NEXT: retq -; -; AVX512-LABEL: uitofp_load_4i16_to_4f64: -; AVX512: # BB#0: -; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; AVX512-NEXT: vcvtudq2pd %xmm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: uitofp_load_4i16_to_4f64: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX-NEXT: retq %ld = load <4 x i16>, <4 x i16> *%a %cvt = uitofp <4 x i16> %ld to <4 x double> ret <4 x double> %cvt @@ -2688,17 +2658,11 @@ ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1 ; SSE-NEXT: retq ; -; VEX-LABEL: uitofp_load_4i8_to_4f64: -; VEX: # BB#0: -; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 -; VEX-NEXT: retq -; -; AVX512-LABEL: uitofp_load_4i8_to_4f64: -; AVX512: # BB#0: -; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; AVX512-NEXT: vcvtudq2pd %xmm0, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: uitofp_load_4i8_to_4f64: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX-NEXT: retq %ld = load <4 x i8>, <4 x i8> *%a %cvt = uitofp <4 x i8> %ld to <4 x double> ret <4 x double> %cvt @@ -3358,17 +3322,11 @@ ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; SSE-NEXT: retq ; -; VEX-LABEL: uitofp_load_4i16_to_4f32: -; VEX: # BB#0: -; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; VEX-NEXT: vcvtdq2ps %xmm0, %xmm0 -; VEX-NEXT: retq -; -; AVX512-LABEL: uitofp_load_4i16_to_4f32: -; AVX512: # BB#0: -; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; AVX512-NEXT: vcvtudq2ps %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: uitofp_load_4i16_to_4f32: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: retq %ld = load <4 x i16>, <4 x i16> *%a %cvt = uitofp <4 x i16> %ld to <4 x float> ret <4 x float> %cvt @@ -3384,17 +3342,11 @@ ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; SSE-NEXT: retq ; -; VEX-LABEL: uitofp_load_4i8_to_4f32: -; VEX: # BB#0: -; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; VEX-NEXT: vcvtdq2ps %xmm0, %xmm0 -; VEX-NEXT: retq -; -; AVX512-LABEL: uitofp_load_4i8_to_4f32: -; AVX512: # BB#0: -; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; AVX512-NEXT: vcvtudq2ps %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: uitofp_load_4i8_to_4f32: +; AVX: # BB#0: +; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: retq %ld = load <4 x i8>, <4 x i8> *%a %cvt = uitofp <4 x i8> %ld to <4 x float> ret <4 x float> %cvt @@ -3917,7 +3869,7 @@ ; AVX512-LABEL: uitofp_load_8i16_to_8f32: ; AVX512: # BB#0: ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; AVX512-NEXT: vcvtudq2ps %ymm0, %ymm0 +; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX512-NEXT: retq %ld = load <8 x i16>, <8 x i16> *%a %cvt = uitofp <8 x i16> %ld to <8 x float> @@ -3954,7 +3906,7 @@ ; AVX512-LABEL: uitofp_load_8i8_to_8f32: ; AVX512: # BB#0: ; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; AVX512-NEXT: vcvtudq2ps %ymm0, %ymm0 +; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX512-NEXT: retq %ld = load <8 x i8>, <8 x i8> *%a %cvt = uitofp <8 x i8> %ld to <8 x float>