diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -330,7 +330,7 @@ if (Subtarget.hasSSE2()) { // Custom lowering for saturating float to int conversions. // We handle promotion to larger result types manually. - for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) { + for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::v4i32}) { setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); } @@ -23468,10 +23468,12 @@ EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); EVT TmpVT = DstVT; + EVT SetCCVT = DAG.getTargetLoweringInfo().getSetCCResultType( + DAG.getDataLayout(), *DAG.getContext(), SrcVT); // This code is only for floats and doubles. Fall back to generic code for // anything else. - if (!isScalarFPTypeInSSEReg(SrcVT) || isSoftFP16(SrcVT)) + if (!isScalarFPTypeInSSEReg(SrcVT.getScalarType()) || isSoftFP16(SrcVT)) return SDValue(); EVT SatVT = cast(Node->getOperand(1))->getVT(); @@ -23483,14 +23485,16 @@ // Promote result of FP_TO_*INT to at least 32 bits. if (TmpWidth < 32) { - TmpVT = MVT::i32; + TmpVT = + TmpVT.isVector() ? TmpVT.changeVectorElementType(MVT::i32) : MVT::i32; TmpWidth = 32; } // Promote conversions to unsigned 32-bit to 64-bit, because it will allow // us to use a native signed conversion instead. if (SatWidth == 32 && !IsSigned && Subtarget.is64Bit()) { - TmpVT = MVT::i64; + TmpVT = + TmpVT.isVector() ? TmpVT.changeVectorElementType(MVT::i64) : MVT::i64; TmpWidth = 64; } @@ -23558,8 +23562,8 @@ // Otherwise, select zero if Src is NaN. SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); - return DAG.getSelectCC( - dl, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); + SDValue IsNaN = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::SETUO); + return DAG.getSelect(dl, DstVT, IsNaN, ZeroInt, FpToInt); } SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT); @@ -23581,13 +23585,13 @@ if (!IsSigned || SatWidth != TmpVT.getScalarSizeInBits()) { // If Src ULT MinFloat, select MinInt. In particular, this also selects // MinInt if Src is NaN. - Select = DAG.getSelectCC( - dl, Src, MinFloatNode, MinIntNode, Select, ISD::CondCode::SETULT); + SDValue SatMin = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT); + Select = DAG.getSelect(dl, DstVT, SatMin, MinIntNode, Select); } // If Src OGT MaxFloat, select MaxInt. - Select = DAG.getSelectCC( - dl, Src, MaxFloatNode, MaxIntNode, Select, ISD::CondCode::SETOGT); + SDValue SatMax = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT); + Select = DAG.getSelect(dl, DstVT, SatMax, MaxIntNode, Select); // In the unsigned case we are done, because we mapped NaN to MinInt, which // is already zero. The promoted case was already handled above. @@ -23597,8 +23601,8 @@ // Otherwise, select 0 if Src is NaN. SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); - return DAG.getSelectCC( - dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO); + SDValue IsNaN = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::SETUO); + return DAG.getSelect(dl, DstVT, IsNaN, ZeroInt, Select); } SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/X86/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/X86/fpclamptosat_vec.ll @@ -144,77 +144,15 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-LABEL: stest_f32i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movaps %xmm0, %xmm1 -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] -; CHECK-NEXT: cvttss2si %xmm1, %rax -; CHECK-NEXT: movq %rax, %xmm1 -; CHECK-NEXT: movaps %xmm0, %xmm2 -; CHECK-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] -; CHECK-NEXT: cvttss2si %xmm2, %rax -; CHECK-NEXT: movq %rax, %xmm2 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm4 -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0] -; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [2147483647,2147483647] -; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648] -; CHECK-NEXT: movdqa %xmm4, %xmm1 -; CHECK-NEXT: pxor %xmm0, %xmm1 -; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3] -; CHECK-NEXT: pxor %xmm6, %xmm6 -; CHECK-NEXT: pcmpeqd %xmm6, %xmm5 -; CHECK-NEXT: movdqa {{.*#+}} xmm7 = [4294967295,4294967295] -; CHECK-NEXT: movdqa %xmm7, %xmm8 -; CHECK-NEXT: pcmpgtd %xmm1, %xmm8 -; CHECK-NEXT: pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2] -; CHECK-NEXT: pand %xmm5, %xmm9 -; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm8[1,1,3,3] -; CHECK-NEXT: por %xmm9, %xmm1 -; CHECK-NEXT: pand %xmm1, %xmm4 -; CHECK-NEXT: pandn %xmm3, %xmm1 -; CHECK-NEXT: por %xmm4, %xmm1 -; CHECK-NEXT: movdqa %xmm2, %xmm4 -; CHECK-NEXT: pxor %xmm0, %xmm4 -; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm6, %xmm5 -; CHECK-NEXT: pcmpgtd %xmm4, %xmm7 -; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2] -; CHECK-NEXT: pand %xmm5, %xmm4 -; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3] -; CHECK-NEXT: por %xmm4, %xmm5 -; CHECK-NEXT: pand %xmm5, %xmm2 -; CHECK-NEXT: pandn %xmm3, %xmm5 -; CHECK-NEXT: por %xmm2, %xmm5 -; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968] -; CHECK-NEXT: movdqa %xmm5, %xmm3 -; CHECK-NEXT: pxor %xmm0, %xmm3 -; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm6, %xmm6 -; CHECK-NEXT: pcmpeqd %xmm6, %xmm4 -; CHECK-NEXT: movdqa {{.*#+}} xmm7 = [18446744069414584320,18446744069414584320] -; CHECK-NEXT: pcmpgtd %xmm7, %xmm3 -; CHECK-NEXT: pshufd {{.*#+}} xmm8 = xmm3[0,0,2,2] -; CHECK-NEXT: pand %xmm4, %xmm8 -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] -; CHECK-NEXT: por %xmm8, %xmm3 -; CHECK-NEXT: pand %xmm3, %xmm5 -; CHECK-NEXT: pandn %xmm2, %xmm3 -; CHECK-NEXT: por %xmm5, %xmm3 -; CHECK-NEXT: pxor %xmm1, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm6, %xmm4 -; CHECK-NEXT: pcmpgtd %xmm7, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm0[0,0,2,2] -; CHECK-NEXT: pand %xmm4, %xmm5 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-NEXT: por %xmm5, %xmm0 -; CHECK-NEXT: pand %xmm0, %xmm1 -; CHECK-NEXT: pandn %xmm2, %xmm0 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2] +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [2.14748352E+9,2.14748352E+9,2.14748352E+9,2.14748352E+9] +; CHECK-NEXT: cmpltps %xmm0, %xmm1 +; CHECK-NEXT: cvttps2dq %xmm0, %xmm2 +; CHECK-NEXT: movaps %xmm1, %xmm3 +; CHECK-NEXT: andnps %xmm2, %xmm3 +; CHECK-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-NEXT: orps %xmm3, %xmm1 +; CHECK-NEXT: cmpunordps %xmm0, %xmm0 +; CHECK-NEXT: andnps %xmm1, %xmm0 ; CHECK-NEXT: retq entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -386,98 +324,83 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-LABEL: stest_f16i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $72, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: subq $64, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: .cfi_offset %rbx, -32 +; CHECK-NEXT: .cfi_offset %r14, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: psrld $16, %xmm1 ; CHECK-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill ; CHECK-NEXT: psrlq $48, %xmm0 ; CHECK-NEXT: callq __extendhfsf2@PLT -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: callq __extendhfsf2@PLT -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: movl $-2147483648, %ebx # imm = 0x80000000 +; CHECK-NEXT: cmovbl %ebx, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: movl $2147483647, %ebp # imm = 0x7FFFFFFF +; CHECK-NEXT: cmoval %ebp, %eax +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovpl %r14d, %eax +; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq __extendhfsf2@PLT -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cmovbl %ebx, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cmoval %ebp, %eax +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovpl %r14d, %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq __extendhfsf2@PLT -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: movdqa (%rsp), %xmm3 # 16-byte Reload -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] -; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647] -; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648] -; CHECK-NEXT: movdqa %xmm3, %xmm1 -; CHECK-NEXT: movdqa %xmm3, %xmm8 -; CHECK-NEXT: pxor %xmm0, %xmm1 -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] -; CHECK-NEXT: pxor %xmm4, %xmm4 -; CHECK-NEXT: pcmpeqd %xmm4, %xmm3 -; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [4294967295,4294967295] -; CHECK-NEXT: movdqa %xmm5, %xmm6 -; CHECK-NEXT: pcmpgtd %xmm1, %xmm6 -; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2] -; CHECK-NEXT: pand %xmm3, %xmm7 -; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3] -; CHECK-NEXT: por %xmm7, %xmm1 -; CHECK-NEXT: pand %xmm1, %xmm8 -; CHECK-NEXT: pandn %xmm2, %xmm1 -; CHECK-NEXT: por %xmm8, %xmm1 -; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload -; CHECK-NEXT: movdqa %xmm7, %xmm3 -; CHECK-NEXT: pxor %xmm0, %xmm3 -; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm3[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm4, %xmm6 -; CHECK-NEXT: pcmpgtd %xmm3, %xmm5 -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,0,2,2] -; CHECK-NEXT: pand %xmm6, %xmm3 -; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] -; CHECK-NEXT: por %xmm3, %xmm4 -; CHECK-NEXT: movdqa %xmm7, %xmm3 -; CHECK-NEXT: pand %xmm4, %xmm3 -; CHECK-NEXT: pandn %xmm2, %xmm4 -; CHECK-NEXT: por %xmm3, %xmm4 -; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [18446744071562067968,18446744071562067968] -; CHECK-NEXT: movdqa %xmm4, %xmm3 -; CHECK-NEXT: pxor %xmm0, %xmm3 -; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm6, %xmm6 -; CHECK-NEXT: pcmpeqd %xmm6, %xmm5 -; CHECK-NEXT: movdqa {{.*#+}} xmm7 = [18446744069414584320,18446744069414584320] -; CHECK-NEXT: pcmpgtd %xmm7, %xmm3 -; CHECK-NEXT: pshufd {{.*#+}} xmm8 = xmm3[0,0,2,2] -; CHECK-NEXT: pand %xmm5, %xmm8 -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] -; CHECK-NEXT: por %xmm8, %xmm3 -; CHECK-NEXT: pand %xmm3, %xmm4 -; CHECK-NEXT: pandn %xmm2, %xmm3 -; CHECK-NEXT: por %xmm4, %xmm3 -; CHECK-NEXT: pxor %xmm1, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm6, %xmm4 -; CHECK-NEXT: pcmpgtd %xmm7, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm0[0,0,2,2] -; CHECK-NEXT: pand %xmm4, %xmm5 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-NEXT: por %xmm5, %xmm0 -; CHECK-NEXT: pand %xmm0, %xmm1 -; CHECK-NEXT: pandn %xmm2, %xmm0 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2] -; CHECK-NEXT: addq $72, %rsp +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cmovbl %ebx, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cmoval %ebp, %eax +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovpl %r14d, %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cmovbl %ebx, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cmoval %ebp, %eax +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovpl %r14d, %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $64, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq entry: @@ -1715,77 +1638,15 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: stest_f32i32_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movaps %xmm0, %xmm1 -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] -; CHECK-NEXT: cvttss2si %xmm1, %rax -; CHECK-NEXT: movq %rax, %xmm1 -; CHECK-NEXT: movaps %xmm0, %xmm2 -; CHECK-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] -; CHECK-NEXT: cvttss2si %xmm2, %rax -; CHECK-NEXT: movq %rax, %xmm2 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm3 -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] -; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648] -; CHECK-NEXT: movdqa %xmm3, %xmm1 -; CHECK-NEXT: pxor %xmm0, %xmm1 -; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] -; CHECK-NEXT: pxor %xmm5, %xmm5 -; CHECK-NEXT: pcmpeqd %xmm5, %xmm4 -; CHECK-NEXT: movdqa {{.*#+}} xmm6 = [4294967295,4294967295] -; CHECK-NEXT: movdqa %xmm6, %xmm7 -; CHECK-NEXT: pcmpgtd %xmm1, %xmm7 -; CHECK-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2] -; CHECK-NEXT: pand %xmm4, %xmm8 -; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm7[1,1,3,3] -; CHECK-NEXT: por %xmm8, %xmm1 -; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [2147483647,2147483647] -; CHECK-NEXT: pand %xmm1, %xmm3 -; CHECK-NEXT: pandn %xmm4, %xmm1 -; CHECK-NEXT: por %xmm3, %xmm1 -; CHECK-NEXT: movdqa %xmm2, %xmm3 -; CHECK-NEXT: pxor %xmm0, %xmm3 -; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm5, %xmm7 -; CHECK-NEXT: pcmpgtd %xmm3, %xmm6 -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm6[0,0,2,2] -; CHECK-NEXT: pand %xmm7, %xmm3 -; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3] -; CHECK-NEXT: por %xmm3, %xmm5 -; CHECK-NEXT: pand %xmm5, %xmm2 -; CHECK-NEXT: pandn %xmm4, %xmm5 -; CHECK-NEXT: por %xmm2, %xmm5 -; CHECK-NEXT: movdqa %xmm5, %xmm2 -; CHECK-NEXT: pxor %xmm0, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm4, %xmm4 -; CHECK-NEXT: pcmpeqd %xmm4, %xmm3 -; CHECK-NEXT: movdqa {{.*#+}} xmm6 = [18446744069414584320,18446744069414584320] -; CHECK-NEXT: pcmpgtd %xmm6, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm2[0,0,2,2] -; CHECK-NEXT: pand %xmm3, %xmm7 -; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; CHECK-NEXT: por %xmm7, %xmm2 -; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [18446744071562067968,18446744071562067968] -; CHECK-NEXT: pand %xmm2, %xmm5 -; CHECK-NEXT: pandn %xmm3, %xmm2 -; CHECK-NEXT: por %xmm5, %xmm2 -; CHECK-NEXT: pxor %xmm1, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm4, %xmm5 -; CHECK-NEXT: pcmpgtd %xmm6, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2] -; CHECK-NEXT: pand %xmm5, %xmm4 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-NEXT: por %xmm4, %xmm0 -; CHECK-NEXT: pand %xmm0, %xmm1 -; CHECK-NEXT: pandn %xmm3, %xmm0 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [2.14748352E+9,2.14748352E+9,2.14748352E+9,2.14748352E+9] +; CHECK-NEXT: cmpltps %xmm0, %xmm1 +; CHECK-NEXT: cvttps2dq %xmm0, %xmm2 +; CHECK-NEXT: movaps %xmm1, %xmm3 +; CHECK-NEXT: andnps %xmm2, %xmm3 +; CHECK-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-NEXT: orps %xmm3, %xmm1 +; CHECK-NEXT: cmpunordps %xmm0, %xmm0 +; CHECK-NEXT: andnps %xmm1, %xmm0 ; CHECK-NEXT: retq entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -1952,98 +1813,83 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-LABEL: stest_f16i32_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $72, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: subq $64, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: .cfi_offset %rbx, -32 +; CHECK-NEXT: .cfi_offset %r14, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: psrld $16, %xmm1 ; CHECK-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] -; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill ; CHECK-NEXT: psrlq $48, %xmm0 ; CHECK-NEXT: callq __extendhfsf2@PLT -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: callq __extendhfsf2@PLT -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: movl $-2147483648, %ebx # imm = 0x80000000 +; CHECK-NEXT: cmovbl %ebx, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: movl $2147483647, %ebp # imm = 0x7FFFFFFF +; CHECK-NEXT: cmoval %ebp, %eax +; CHECK-NEXT: xorl %r14d, %r14d +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovpl %r14d, %eax +; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq __extendhfsf2@PLT -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cmovbl %ebx, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cmoval %ebp, %eax +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovpl %r14d, %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: callq __extendhfsf2@PLT -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: movdqa (%rsp), %xmm2 # 16-byte Reload -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] -; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648] -; CHECK-NEXT: movdqa %xmm2, %xmm1 -; CHECK-NEXT: movdqa %xmm2, %xmm7 -; CHECK-NEXT: pxor %xmm0, %xmm1 -; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; CHECK-NEXT: pxor %xmm3, %xmm3 -; CHECK-NEXT: pcmpeqd %xmm3, %xmm2 -; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967295] -; CHECK-NEXT: movdqa %xmm4, %xmm5 -; CHECK-NEXT: pcmpgtd %xmm1, %xmm5 -; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] -; CHECK-NEXT: pand %xmm2, %xmm6 -; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3] -; CHECK-NEXT: por %xmm6, %xmm1 -; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647] -; CHECK-NEXT: pand %xmm1, %xmm7 -; CHECK-NEXT: pandn %xmm2, %xmm1 -; CHECK-NEXT: por %xmm7, %xmm1 -; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload -; CHECK-NEXT: movdqa %xmm7, %xmm5 -; CHECK-NEXT: pxor %xmm0, %xmm5 -; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm3, %xmm6 -; CHECK-NEXT: pcmpgtd %xmm5, %xmm4 -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,0,2,2] -; CHECK-NEXT: pand %xmm6, %xmm3 -; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3] -; CHECK-NEXT: por %xmm3, %xmm4 -; CHECK-NEXT: movdqa %xmm7, %xmm3 -; CHECK-NEXT: pand %xmm4, %xmm3 -; CHECK-NEXT: pandn %xmm2, %xmm4 -; CHECK-NEXT: por %xmm3, %xmm4 -; CHECK-NEXT: movdqa %xmm4, %xmm2 -; CHECK-NEXT: pxor %xmm0, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm5, %xmm5 -; CHECK-NEXT: pcmpeqd %xmm5, %xmm3 -; CHECK-NEXT: movdqa {{.*#+}} xmm6 = [18446744069414584320,18446744069414584320] -; CHECK-NEXT: pcmpgtd %xmm6, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm2[0,0,2,2] -; CHECK-NEXT: pand %xmm3, %xmm7 -; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; CHECK-NEXT: por %xmm7, %xmm2 -; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [18446744071562067968,18446744071562067968] -; CHECK-NEXT: pand %xmm2, %xmm4 -; CHECK-NEXT: pandn %xmm3, %xmm2 -; CHECK-NEXT: por %xmm4, %xmm2 -; CHECK-NEXT: pxor %xmm1, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm5, %xmm4 -; CHECK-NEXT: pcmpgtd %xmm6, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm0[0,0,2,2] -; CHECK-NEXT: pand %xmm4, %xmm5 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-NEXT: por %xmm5, %xmm0 -; CHECK-NEXT: pand %xmm0, %xmm1 -; CHECK-NEXT: pandn %xmm3, %xmm0 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] -; CHECK-NEXT: addq $72, %rsp +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cmovbl %ebx, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cmoval %ebp, %eax +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovpl %r14d, %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cmovbl %ebx, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cmoval %ebp, %eax +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovpl %r14d, %eax +; CHECK-NEXT: movd %eax, %xmm1 +; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $64, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll --- a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll +++ b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll @@ -16,42 +16,12 @@ ; CHECK-LABEL: test_signed_v4i1_v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: movaps %xmm0, %xmm1 -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] -; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: ucomiss %xmm1, %xmm1 -; CHECK-NEXT: maxss %xmm2, %xmm1 -; CHECK-NEXT: xorps %xmm3, %xmm3 -; CHECK-NEXT: minss %xmm3, %xmm1 -; CHECK-NEXT: cvttss2si %xmm1, %ecx -; CHECK-NEXT: cmovpl %eax, %ecx -; CHECK-NEXT: movd %ecx, %xmm1 -; CHECK-NEXT: movaps %xmm0, %xmm4 -; CHECK-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] -; CHECK-NEXT: ucomiss %xmm4, %xmm4 -; CHECK-NEXT: maxss %xmm2, %xmm4 -; CHECK-NEXT: minss %xmm3, %xmm4 -; CHECK-NEXT: cvttss2si %xmm4, %ecx -; CHECK-NEXT: cmovpl %eax, %ecx -; CHECK-NEXT: movd %ecx, %xmm4 -; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; CHECK-NEXT: movaps %xmm0, %xmm1 -; CHECK-NEXT: maxss %xmm2, %xmm1 -; CHECK-NEXT: minss %xmm3, %xmm1 -; CHECK-NEXT: cvttss2si %xmm1, %ecx -; CHECK-NEXT: ucomiss %xmm0, %xmm0 -; CHECK-NEXT: cmovpl %eax, %ecx -; CHECK-NEXT: movd %ecx, %xmm1 -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; CHECK-NEXT: ucomiss %xmm0, %xmm0 -; CHECK-NEXT: maxss %xmm2, %xmm0 -; CHECK-NEXT: minss %xmm3, %xmm0 -; CHECK-NEXT: cvttss2si %xmm0, %ecx -; CHECK-NEXT: cmovpl %eax, %ecx -; CHECK-NEXT: movd %ecx, %xmm0 -; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0] -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: maxps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-NEXT: xorps %xmm2, %xmm2 +; CHECK-NEXT: minps %xmm1, %xmm2 +; CHECK-NEXT: cvttps2dq %xmm2, %xmm1 +; CHECK-NEXT: cmpunordps %xmm0, %xmm0 +; CHECK-NEXT: andnps %xmm1, %xmm0 ; CHECK-NEXT: retq %x = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> %f) ret <4 x i1> %x @@ -139,42 +109,15 @@ define <4 x i32> @test_signed_v4i32_v4f32(<4 x float> %f) nounwind { ; CHECK-LABEL: test_signed_v4i32_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: movaps %xmm0, %xmm1 -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] -; CHECK-NEXT: cvttss2si %xmm1, %edx -; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: ucomiss %xmm2, %xmm1 -; CHECK-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; CHECK-NEXT: cmoval %eax, %edx -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: ucomiss %xmm1, %xmm1 -; CHECK-NEXT: cmovpl %ecx, %edx -; CHECK-NEXT: movd %edx, %xmm1 -; CHECK-NEXT: movaps %xmm0, %xmm3 -; CHECK-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] -; CHECK-NEXT: cvttss2si %xmm3, %edx -; CHECK-NEXT: ucomiss %xmm2, %xmm3 -; CHECK-NEXT: cmoval %eax, %edx -; CHECK-NEXT: ucomiss %xmm3, %xmm3 -; CHECK-NEXT: cmovpl %ecx, %edx -; CHECK-NEXT: movd %edx, %xmm3 -; CHECK-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; CHECK-NEXT: cvttss2si %xmm0, %edx -; CHECK-NEXT: ucomiss %xmm2, %xmm0 -; CHECK-NEXT: cmoval %eax, %edx -; CHECK-NEXT: ucomiss %xmm0, %xmm0 -; CHECK-NEXT: cmovpl %ecx, %edx -; CHECK-NEXT: movd %edx, %xmm1 -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; CHECK-NEXT: cvttss2si %xmm0, %edx -; CHECK-NEXT: ucomiss %xmm2, %xmm0 -; CHECK-NEXT: cmoval %eax, %edx -; CHECK-NEXT: ucomiss %xmm0, %xmm0 -; CHECK-NEXT: cmovpl %ecx, %edx -; CHECK-NEXT: movd %edx, %xmm0 -; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [2.14748352E+9,2.14748352E+9,2.14748352E+9,2.14748352E+9] +; CHECK-NEXT: cmpltps %xmm0, %xmm1 +; CHECK-NEXT: cvttps2dq %xmm0, %xmm2 +; CHECK-NEXT: movaps %xmm1, %xmm3 +; CHECK-NEXT: andnps %xmm2, %xmm3 +; CHECK-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-NEXT: orps %xmm3, %xmm1 +; CHECK-NEXT: cmpunordps %xmm0, %xmm0 +; CHECK-NEXT: andnps %xmm1, %xmm0 ; CHECK-NEXT: retq %x = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %f) ret <4 x i32> %x diff --git a/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll --- a/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll +++ b/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll @@ -15,34 +15,10 @@ define <4 x i1> @test_unsigned_v4i1_v4f32(<4 x float> %f) nounwind { ; CHECK-LABEL: test_unsigned_v4i1_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: movaps %xmm0, %xmm1 -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] -; CHECK-NEXT: xorps %xmm2, %xmm2 -; CHECK-NEXT: maxss %xmm2, %xmm1 -; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: minss %xmm3, %xmm1 -; CHECK-NEXT: cvttss2si %xmm1, %eax -; CHECK-NEXT: movd %eax, %xmm1 -; CHECK-NEXT: movaps %xmm0, %xmm4 -; CHECK-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] -; CHECK-NEXT: maxss %xmm2, %xmm4 -; CHECK-NEXT: minss %xmm3, %xmm4 -; CHECK-NEXT: cvttss2si %xmm4, %eax -; CHECK-NEXT: movd %eax, %xmm4 -; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; CHECK-NEXT: movaps %xmm0, %xmm1 -; CHECK-NEXT: maxss %xmm2, %xmm1 -; CHECK-NEXT: minss %xmm3, %xmm1 -; CHECK-NEXT: cvttss2si %xmm1, %eax -; CHECK-NEXT: movd %eax, %xmm1 -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; CHECK-NEXT: maxss %xmm2, %xmm0 -; CHECK-NEXT: minss %xmm3, %xmm0 -; CHECK-NEXT: cvttss2si %xmm0, %eax -; CHECK-NEXT: movd %eax, %xmm0 -; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0] -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: maxps %xmm1, %xmm0 +; CHECK-NEXT: minps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cvttps2dq %xmm0, %xmm0 ; CHECK-NEXT: retq %x = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> %f) ret <4 x i1> %x @@ -130,43 +106,29 @@ define <4 x i32> @test_unsigned_v4i32_v4f32(<4 x float> %f) nounwind { ; CHECK-LABEL: test_unsigned_v4i32_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: movaps %xmm0, %xmm1 -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] -; CHECK-NEXT: cvttss2si %xmm1, %rdx -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorps %xmm2, %xmm2 -; CHECK-NEXT: ucomiss %xmm2, %xmm1 -; CHECK-NEXT: cmovbl %eax, %edx -; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: ucomiss %xmm3, %xmm1 -; CHECK-NEXT: movl $-1, %ecx -; CHECK-NEXT: cmoval %ecx, %edx -; CHECK-NEXT: movd %edx, %xmm1 -; CHECK-NEXT: movaps %xmm0, %xmm4 -; CHECK-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] -; CHECK-NEXT: cvttss2si %xmm4, %rdx -; CHECK-NEXT: ucomiss %xmm2, %xmm4 -; CHECK-NEXT: cmovbl %eax, %edx -; CHECK-NEXT: ucomiss %xmm3, %xmm4 -; CHECK-NEXT: cmoval %ecx, %edx -; CHECK-NEXT: movd %edx, %xmm4 -; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; CHECK-NEXT: cvttss2si %xmm0, %rdx -; CHECK-NEXT: ucomiss %xmm2, %xmm0 -; CHECK-NEXT: cmovbl %eax, %edx -; CHECK-NEXT: ucomiss %xmm3, %xmm0 -; CHECK-NEXT: cmoval %ecx, %edx -; CHECK-NEXT: movd %edx, %xmm1 -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] -; CHECK-NEXT: cvttss2si %xmm0, %rdx -; CHECK-NEXT: ucomiss %xmm2, %xmm0 -; CHECK-NEXT: cmovbl %eax, %edx -; CHECK-NEXT: ucomiss %xmm3, %xmm0 -; CHECK-NEXT: cmoval %ecx, %edx -; CHECK-NEXT: movd %edx, %xmm0 -; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0] -; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: movaps %xmm0, %xmm2 +; CHECK-NEXT: cvttss2si %xmm0, %rax +; CHECK-NEXT: movaps %xmm0, %xmm3 +; CHECK-NEXT: xorps %xmm4, %xmm4 +; CHECK-NEXT: cmpnleps %xmm0, %xmm4 +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [4.29496704E+9,4.29496704E+9,4.29496704E+9,4.29496704E+9] +; CHECK-NEXT: cmpltps %xmm0, %xmm1 +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; CHECK-NEXT: cvttss2si %xmm0, %rcx +; CHECK-NEXT: movd %ecx, %xmm0 +; CHECK-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; CHECK-NEXT: cvttss2si %xmm2, %rcx +; CHECK-NEXT: movd %ecx, %xmm2 +; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,1,1] +; CHECK-NEXT: cvttss2si %xmm3, %rax +; CHECK-NEXT: movd %eax, %xmm3 +; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: andnps %xmm0, %xmm4 +; CHECK-NEXT: orps %xmm4, %xmm1 +; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: retq %x = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %f) ret <4 x i32> %x