Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4148,23 +4148,25 @@ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); // Expand based on maximum range of FP_TO_SINT: - // True = fp_to_sint(Src) - // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000) - // Result = select (Src < 0x8000000000000000), True, False + // Sel = Src < 0x8000000000000000 + // Val = select Sel, Src, Src - 0x8000000000000000 + // Ofs = select Sel, 0, 0x8000000000000000 + // Result = fp_to_sint(Val) ^ Ofs APFloat apf(DAG.EVTToAPFloatSemantics(SrcVT), APInt::getNullValue(SrcVT.getScalarSizeInBits())); APInt x = APInt::getSignMask(DstVT.getScalarSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); - SDValue Tmp1 = DAG.getConstantFP(apf, dl, SrcVT); - SDValue Tmp2 = DAG.getSetCC(dl, SetCCVT, Src, Tmp1, ISD::SETLT); - SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); + SDValue Cst = DAG.getConstantFP(apf, dl, SrcVT); + SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); + // TODO: Should any fast-math-flags be set for the FSUB? - SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, - DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Tmp1)); - False = - DAG.getNode(ISD::XOR, dl, DstVT, False, DAG.getConstant(x, dl, DstVT)); - Result = DAG.getSelect(dl, DstVT, Tmp2, True, False); + SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, + DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); + SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), + DAG.getConstant(x, dl, DstVT)); + Result = DAG.getNode(ISD::XOR, dl, DstVT, + DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs); return true; } Index: test/CodeGen/Mips/2008-07-07-Float2Int.ll =================================================================== --- test/CodeGen/Mips/2008-07-07-Float2Int.ll +++ test/CodeGen/Mips/2008-07-07-Float2Int.ll @@ -17,16 +17,15 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lui $1, %hi($CPI1_0) ; CHECK-NEXT: lwc1 $f0, %lo($CPI1_0)($1) -; CHECK-NEXT: sub.s $f1, $f12, $f0 -; CHECK-NEXT: trunc.w.s $f1, $f1 -; CHECK-NEXT: mfc1 $1, $f1 -; CHECK-NEXT: lui $2, 32768 -; CHECK-NEXT: xor $2, $1, $2 -; CHECK-NEXT: trunc.w.s $f1, $f12 -; CHECK-NEXT: mfc1 $1, $f1 +; CHECK-NEXT: lui $1, 32768 ; CHECK-NEXT: c.olt.s $f12, $f0 +; CHECK-NEXT: movt $1, $zero, $fcc0 +; CHECK-NEXT: sub.s $f0, $f12, $f0 +; CHECK-NEXT: movt.s $f0, $f12, $fcc0 +; CHECK-NEXT: trunc.w.s $f0, $f0 +; CHECK-NEXT: mfc1 $2, $f0 ; CHECK-NEXT: jr $ra -; CHECK-NEXT: movt $2, $1, $fcc0 +; CHECK-NEXT: xor $2, $2, $1 entry: fptoui float %a to i32 ; :0 [#uses=1] ret i32 %0 Index: test/CodeGen/Mips/msa/f16-llvm-ir.ll =================================================================== --- test/CodeGen/Mips/msa/f16-llvm-ir.ll +++ test/CodeGen/Mips/msa/f16-llvm-ir.ll @@ -258,44 +258,47 @@ ; MIPS32-O32-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32-O32-NEXT: addu $1, $2, $25 ; MIPS32-O32-NEXT: lw $2, %got(h)($1) -; MIPS32-O32-NEXT: lw $3, %got($CPI3_0)($1) -; MIPS32-O32-NEXT: lwc1 $f0, %lo($CPI3_0)($3) +; MIPS32-O32-NEXT: lh $2, 0($2) +; MIPS32-O32-NEXT: fill.h $w0, $2 +; MIPS32-O32-NEXT: fexupr.w $w0, $w0 +; MIPS32-O32-NEXT: copy_s.w $2, $w0[0] +; MIPS32-O32-NEXT: mtc1 $2, $f0 +; MIPS32-O32-NEXT: lw $2, %got($CPI3_0)($1) +; MIPS32-O32-NEXT: addiu $2, $2, %lo($CPI3_0) ; MIPS32-O32-NEXT: lh $2, 0($2) ; MIPS32-O32-NEXT: fill.h $w1, $2 ; MIPS32-O32-NEXT: fexupr.w $w1, $w1 ; MIPS32-O32-NEXT: copy_s.w $2, $w1[0] -; MIPS32-O32-NEXT: mtc1 $2, $f2 -; MIPS32-O32-NEXT: sub.s $f0, $f2, $f0 -; MIPS32-O32-NEXT: mfc1 $2, $f0 -; MIPS32-O32-NEXT: fill.w $w0, $2 +; MIPS32-O32-NEXT: mtc1 $2, $f1 +; MIPS32-O32-NEXT: lui $2, 32768 +; MIPS32-O32-NEXT: c.olt.s $f0, $f1 +; MIPS32-O32-NEXT: movt $2, $zero, $fcc0 +; MIPS32-O32-NEXT: addiu $3, $zero, 1 +; MIPS32-O32-NEXT: lw $1, %got($CPI3_1)($1) +; MIPS32-O32-NEXT: lwc1 $f1, %lo($CPI3_1)($1) +; MIPS32-O32-NEXT: c.olt.s $f0, $f1 +; MIPS32-O32-NEXT: movf $3, $zero, $fcc0 +; MIPS32-O32-NEXT: sub.s $f1, $f0, $f1 +; MIPS32-O32-NEXT: mfc1 $1, $f1 +; MIPS32-O32-NEXT: fill.w $w1, $1 +; MIPS32-O32-NEXT: fexdo.h $w1, $w1, $w1 +; MIPS32-O32-NEXT: fexupr.w $w1, $w1 +; MIPS32-O32-NEXT: copy_s.w $1, $w1[0] +; MIPS32-O32-NEXT: mtc1 $1, $f1 +; MIPS32-O32-NEXT: movn.s $f1, $f0, $3 +; MIPS32-O32-NEXT: mfc1 $1, $f1 +; MIPS32-O32-NEXT: fill.w $w0, $1 ; MIPS32-O32-NEXT: fexdo.h $w0, $w0, $w0 ; MIPS32-O32-NEXT: fexupr.w $w0, $w0 ; MIPS32-O32-NEXT: fexupr.d $w0, $w0 -; MIPS32-O32-NEXT: copy_s.w $2, $w0[0] -; MIPS32-O32-NEXT: mtc1 $2, $f3 -; MIPS32-O32-NEXT: copy_s.w $2, $w0[1] -; MIPS32-O32-NEXT: mthc1 $2, $f3 -; MIPS32-O32-NEXT: trunc.w.d $f0, $f3 -; MIPS32-O32-NEXT: mfc1 $2, $f0 -; MIPS32-O32-NEXT: fexupr.d $w0, $w1 -; MIPS32-O32-NEXT: copy_s.w $3, $w0[0] -; MIPS32-O32-NEXT: mtc1 $3, $f1 -; MIPS32-O32-NEXT: copy_s.w $3, $w0[1] -; MIPS32-O32-NEXT: mthc1 $3, $f1 -; MIPS32-O32-NEXT: trunc.w.d $f0, $f1 -; MIPS32-O32-NEXT: mfc1 $3, $f0 -; MIPS32-O32-NEXT: lw $1, %got($CPI3_1)($1) -; MIPS32-O32-NEXT: addiu $1, $1, %lo($CPI3_1) -; MIPS32-O32-NEXT: lui $4, 32768 -; MIPS32-O32-NEXT: xor $2, $2, $4 -; MIPS32-O32-NEXT: lh $1, 0($1) -; MIPS32-O32-NEXT: fill.h $w0, $1 -; MIPS32-O32-NEXT: fexupr.w $w0, $w0 ; MIPS32-O32-NEXT: copy_s.w $1, $w0[0] -; MIPS32-O32-NEXT: mtc1 $1, $f0 -; MIPS32-O32-NEXT: c.olt.s $f2, $f0 +; MIPS32-O32-NEXT: mtc1 $1, $f1 +; MIPS32-O32-NEXT: copy_s.w $1, $w0[1] +; MIPS32-O32-NEXT: mthc1 $1, $f1 +; MIPS32-O32-NEXT: trunc.w.d $f0, $f1 +; MIPS32-O32-NEXT: mfc1 $1, $f0 ; MIPS32-O32-NEXT: jr $ra -; MIPS32-O32-NEXT: movt $2, $3, $fcc0 +; MIPS32-O32-NEXT: xor $2, $1, $2 ; ; MIPS64R5-N32-LABEL: ffptoui: ; MIPS64R5-N32: # %bb.0: # %entry @@ -303,40 +306,45 @@ ; MIPS64R5-N32-NEXT: addu $1, $1, $25 ; MIPS64R5-N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(ffptoui))) ; MIPS64R5-N32-NEXT: lw $2, %got_disp(h)($1) -; MIPS64R5-N32-NEXT: lw $3, %got_page(.LCPI3_0)($1) -; MIPS64R5-N32-NEXT: lwc1 $f0, %got_ofst(.LCPI3_0)($3) +; MIPS64R5-N32-NEXT: lh $2, 0($2) +; MIPS64R5-N32-NEXT: fill.h $w0, $2 +; MIPS64R5-N32-NEXT: fexupr.w $w0, $w0 +; MIPS64R5-N32-NEXT: copy_s.w $2, $w0[0] +; MIPS64R5-N32-NEXT: mtc1 $2, $f0 +; MIPS64R5-N32-NEXT: lw $2, %got_page(.LCPI3_0)($1) +; MIPS64R5-N32-NEXT: addiu $2, $2, %got_ofst(.LCPI3_0) ; MIPS64R5-N32-NEXT: lh $2, 0($2) ; MIPS64R5-N32-NEXT: fill.h $w1, $2 ; MIPS64R5-N32-NEXT: fexupr.w $w1, $w1 ; MIPS64R5-N32-NEXT: copy_s.w $2, $w1[0] -; MIPS64R5-N32-NEXT: mtc1 $2, $f2 -; MIPS64R5-N32-NEXT: sub.s $f0, $f2, $f0 -; MIPS64R5-N32-NEXT: mfc1 $2, $f0 -; MIPS64R5-N32-NEXT: fill.w $w0, $2 +; MIPS64R5-N32-NEXT: mtc1 $2, $f1 +; MIPS64R5-N32-NEXT: lui $2, 32768 +; MIPS64R5-N32-NEXT: c.olt.s $f0, $f1 +; MIPS64R5-N32-NEXT: movt $2, $zero, $fcc0 +; MIPS64R5-N32-NEXT: addiu $3, $zero, 1 +; MIPS64R5-N32-NEXT: lw $1, %got_page(.LCPI3_1)($1) +; MIPS64R5-N32-NEXT: lwc1 $f1, %got_ofst(.LCPI3_1)($1) +; MIPS64R5-N32-NEXT: c.olt.s $f0, $f1 +; MIPS64R5-N32-NEXT: movf $3, $zero, $fcc0 +; MIPS64R5-N32-NEXT: sub.s $f1, $f0, $f1 +; MIPS64R5-N32-NEXT: mfc1 $1, $f1 +; MIPS64R5-N32-NEXT: fill.w $w1, $1 +; MIPS64R5-N32-NEXT: fexdo.h $w1, $w1, $w1 +; MIPS64R5-N32-NEXT: fexupr.w $w1, $w1 +; MIPS64R5-N32-NEXT: copy_s.w $1, $w1[0] +; MIPS64R5-N32-NEXT: mtc1 $1, $f1 +; MIPS64R5-N32-NEXT: movn.s $f1, $f0, $3 +; MIPS64R5-N32-NEXT: mfc1 $1, $f1 +; MIPS64R5-N32-NEXT: fill.w $w0, $1 ; MIPS64R5-N32-NEXT: fexdo.h $w0, $w0, $w0 ; MIPS64R5-N32-NEXT: fexupr.w $w0, $w0 ; MIPS64R5-N32-NEXT: fexupr.d $w0, $w0 -; MIPS64R5-N32-NEXT: copy_s.d $2, $w0[0] -; MIPS64R5-N32-NEXT: dmtc1 $2, $f0 +; MIPS64R5-N32-NEXT: copy_s.d $1, $w0[0] +; MIPS64R5-N32-NEXT: dmtc1 $1, $f0 ; MIPS64R5-N32-NEXT: trunc.w.d $f0, $f0 -; MIPS64R5-N32-NEXT: mfc1 $2, $f0 -; MIPS64R5-N32-NEXT: fexupr.d $w0, $w1 -; MIPS64R5-N32-NEXT: copy_s.d $3, $w0[0] -; MIPS64R5-N32-NEXT: dmtc1 $3, $f0 -; MIPS64R5-N32-NEXT: trunc.w.d $f0, $f0 -; MIPS64R5-N32-NEXT: mfc1 $3, $f0 -; MIPS64R5-N32-NEXT: lw $1, %got_page(.LCPI3_1)($1) -; MIPS64R5-N32-NEXT: addiu $1, $1, %got_ofst(.LCPI3_1) -; MIPS64R5-N32-NEXT: lui $4, 32768 -; MIPS64R5-N32-NEXT: xor $2, $2, $4 -; MIPS64R5-N32-NEXT: lh $1, 0($1) -; MIPS64R5-N32-NEXT: fill.h $w0, $1 -; MIPS64R5-N32-NEXT: fexupr.w $w0, $w0 -; MIPS64R5-N32-NEXT: copy_s.w $1, $w0[0] -; MIPS64R5-N32-NEXT: mtc1 $1, $f0 -; MIPS64R5-N32-NEXT: c.olt.s $f2, $f0 +; MIPS64R5-N32-NEXT: mfc1 $1, $f0 ; MIPS64R5-N32-NEXT: jr $ra -; MIPS64R5-N32-NEXT: movt $2, $3, $fcc0 +; MIPS64R5-N32-NEXT: xor $2, $1, $2 ; ; MIPS64R5-N64-LABEL: ffptoui: ; MIPS64R5-N64: # %bb.0: # %entry @@ -344,40 +352,45 @@ ; MIPS64R5-N64-NEXT: daddu $1, $1, $25 ; MIPS64R5-N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(ffptoui))) ; MIPS64R5-N64-NEXT: ld $2, %got_disp(h)($1) -; MIPS64R5-N64-NEXT: ld $3, %got_page(.LCPI3_0)($1) -; MIPS64R5-N64-NEXT: lwc1 $f0, %got_ofst(.LCPI3_0)($3) +; MIPS64R5-N64-NEXT: lh $2, 0($2) +; MIPS64R5-N64-NEXT: fill.h $w0, $2 +; MIPS64R5-N64-NEXT: fexupr.w $w0, $w0 +; MIPS64R5-N64-NEXT: copy_s.w $2, $w0[0] +; MIPS64R5-N64-NEXT: mtc1 $2, $f0 +; MIPS64R5-N64-NEXT: ld $2, %got_page(.LCPI3_0)($1) +; MIPS64R5-N64-NEXT: daddiu $2, $2, %got_ofst(.LCPI3_0) ; MIPS64R5-N64-NEXT: lh $2, 0($2) ; MIPS64R5-N64-NEXT: fill.h $w1, $2 ; MIPS64R5-N64-NEXT: fexupr.w $w1, $w1 ; MIPS64R5-N64-NEXT: copy_s.w $2, $w1[0] -; MIPS64R5-N64-NEXT: mtc1 $2, $f2 -; MIPS64R5-N64-NEXT: sub.s $f0, $f2, $f0 -; MIPS64R5-N64-NEXT: mfc1 $2, $f0 -; MIPS64R5-N64-NEXT: fill.w $w0, $2 +; MIPS64R5-N64-NEXT: mtc1 $2, $f1 +; MIPS64R5-N64-NEXT: lui $2, 32768 +; MIPS64R5-N64-NEXT: c.olt.s $f0, $f1 +; MIPS64R5-N64-NEXT: movt $2, $zero, $fcc0 +; MIPS64R5-N64-NEXT: addiu $3, $zero, 1 +; MIPS64R5-N64-NEXT: ld $1, %got_page(.LCPI3_1)($1) +; MIPS64R5-N64-NEXT: lwc1 $f1, %got_ofst(.LCPI3_1)($1) +; MIPS64R5-N64-NEXT: c.olt.s $f0, $f1 +; MIPS64R5-N64-NEXT: movf $3, $zero, $fcc0 +; MIPS64R5-N64-NEXT: sub.s $f1, $f0, $f1 +; MIPS64R5-N64-NEXT: mfc1 $1, $f1 +; MIPS64R5-N64-NEXT: fill.w $w1, $1 +; MIPS64R5-N64-NEXT: fexdo.h $w1, $w1, $w1 +; MIPS64R5-N64-NEXT: fexupr.w $w1, $w1 +; MIPS64R5-N64-NEXT: copy_s.w $1, $w1[0] +; MIPS64R5-N64-NEXT: mtc1 $1, $f1 +; MIPS64R5-N64-NEXT: movn.s $f1, $f0, $3 +; MIPS64R5-N64-NEXT: mfc1 $1, $f1 +; MIPS64R5-N64-NEXT: fill.w $w0, $1 ; MIPS64R5-N64-NEXT: fexdo.h $w0, $w0, $w0 ; MIPS64R5-N64-NEXT: fexupr.w $w0, $w0 ; MIPS64R5-N64-NEXT: fexupr.d $w0, $w0 -; MIPS64R5-N64-NEXT: copy_s.d $2, $w0[0] -; MIPS64R5-N64-NEXT: dmtc1 $2, $f0 +; MIPS64R5-N64-NEXT: copy_s.d $1, $w0[0] +; MIPS64R5-N64-NEXT: dmtc1 $1, $f0 ; MIPS64R5-N64-NEXT: trunc.w.d $f0, $f0 -; MIPS64R5-N64-NEXT: mfc1 $2, $f0 -; MIPS64R5-N64-NEXT: fexupr.d $w0, $w1 -; MIPS64R5-N64-NEXT: copy_s.d $3, $w0[0] -; MIPS64R5-N64-NEXT: dmtc1 $3, $f0 -; MIPS64R5-N64-NEXT: trunc.w.d $f0, $f0 -; MIPS64R5-N64-NEXT: mfc1 $3, $f0 -; MIPS64R5-N64-NEXT: ld $1, %got_page(.LCPI3_1)($1) -; MIPS64R5-N64-NEXT: daddiu $1, $1, %got_ofst(.LCPI3_1) -; MIPS64R5-N64-NEXT: lui $4, 32768 -; MIPS64R5-N64-NEXT: xor $2, $2, $4 -; MIPS64R5-N64-NEXT: lh $1, 0($1) -; MIPS64R5-N64-NEXT: fill.h $w0, $1 -; MIPS64R5-N64-NEXT: fexupr.w $w0, $w0 -; MIPS64R5-N64-NEXT: copy_s.w $1, $w0[0] -; MIPS64R5-N64-NEXT: mtc1 $1, $f0 -; MIPS64R5-N64-NEXT: c.olt.s $f2, $f0 +; MIPS64R5-N64-NEXT: mfc1 $1, $f0 ; MIPS64R5-N64-NEXT: jr $ra -; MIPS64R5-N64-NEXT: movt $2, $3, $fcc0 +; MIPS64R5-N64-NEXT: xor $2, $1, $2 ; ; MIPSR6-O32-LABEL: ffptoui: ; MIPSR6-O32: # %bb.0: # %entry @@ -385,40 +398,39 @@ ; MIPSR6-O32-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPSR6-O32-NEXT: addu $1, $2, $25 ; MIPSR6-O32-NEXT: lw $2, %got(h)($1) +; MIPSR6-O32-NEXT: lh $2, 0($2) +; MIPSR6-O32-NEXT: fill.h $w0, $2 +; MIPSR6-O32-NEXT: fexupr.w $w0, $w0 +; MIPSR6-O32-NEXT: copy_s.w $2, $w0[0] +; MIPSR6-O32-NEXT: mtc1 $2, $f0 ; MIPSR6-O32-NEXT: lw $1, %got($CPI3_0)($1) -; MIPSR6-O32-NEXT: lwc1 $f0, %lo($CPI3_0)($1) -; MIPSR6-O32-NEXT: lh $1, 0($2) -; MIPSR6-O32-NEXT: fill.h $w1, $1 +; MIPSR6-O32-NEXT: lwc1 $f1, %lo($CPI3_0)($1) +; MIPSR6-O32-NEXT: cmp.lt.s $f2, $f0, $f1 +; MIPSR6-O32-NEXT: sub.s $f1, $f0, $f1 +; MIPSR6-O32-NEXT: mfc1 $1, $f1 +; MIPSR6-O32-NEXT: fill.w $w1, $1 +; MIPSR6-O32-NEXT: fexdo.h $w1, $w1, $w1 ; MIPSR6-O32-NEXT: fexupr.w $w1, $w1 ; MIPSR6-O32-NEXT: copy_s.w $1, $w1[0] -; MIPSR6-O32-NEXT: mtc1 $1, $f2 -; MIPSR6-O32-NEXT: cmp.lt.s $f3, $f2, $f0 -; MIPSR6-O32-NEXT: sub.s $f0, $f2, $f0 -; MIPSR6-O32-NEXT: mfc1 $1, $f0 +; MIPSR6-O32-NEXT: mtc1 $1, $f1 +; MIPSR6-O32-NEXT: mov.s $f3, $f2 +; MIPSR6-O32-NEXT: sel.s $f3, $f1, $f0 +; MIPSR6-O32-NEXT: mfc1 $1, $f3 ; MIPSR6-O32-NEXT: fill.w $w0, $1 ; MIPSR6-O32-NEXT: fexdo.h $w0, $w0, $w0 ; MIPSR6-O32-NEXT: fexupr.w $w0, $w0 ; MIPSR6-O32-NEXT: fexupr.d $w0, $w0 ; MIPSR6-O32-NEXT: copy_s.w $1, $w0[0] -; MIPSR6-O32-NEXT: mtc1 $1, $f2 +; MIPSR6-O32-NEXT: mtc1 $1, $f1 ; MIPSR6-O32-NEXT: copy_s.w $1, $w0[1] -; MIPSR6-O32-NEXT: mthc1 $1, $f2 -; MIPSR6-O32-NEXT: trunc.w.d $f0, $f2 -; MIPSR6-O32-NEXT: mfc1 $1, $f0 -; MIPSR6-O32-NEXT: fexupr.d $w0, $w1 -; MIPSR6-O32-NEXT: copy_s.w $2, $w0[0] -; MIPSR6-O32-NEXT: mtc1 $2, $f1 -; MIPSR6-O32-NEXT: copy_s.w $2, $w0[1] -; MIPSR6-O32-NEXT: mthc1 $2, $f1 +; MIPSR6-O32-NEXT: mthc1 $1, $f1 ; MIPSR6-O32-NEXT: trunc.w.d $f0, $f1 -; MIPSR6-O32-NEXT: mfc1 $2, $f0 -; MIPSR6-O32-NEXT: lui $3, 32768 -; MIPSR6-O32-NEXT: xor $1, $1, $3 -; MIPSR6-O32-NEXT: mfc1 $3, $f3 -; MIPSR6-O32-NEXT: seleqz $1, $1, $3 -; MIPSR6-O32-NEXT: selnez $2, $2, $3 +; MIPSR6-O32-NEXT: mfc1 $1, $f0 +; MIPSR6-O32-NEXT: lui $2, 32768 +; MIPSR6-O32-NEXT: mfc1 $3, $f2 +; MIPSR6-O32-NEXT: seleqz $2, $2, $3 ; MIPSR6-O32-NEXT: jr $ra -; MIPSR6-O32-NEXT: or $2, $2, $1 +; MIPSR6-O32-NEXT: xor $2, $1, $2 ; ; MIPSR6-N32-LABEL: ffptoui: ; MIPSR6-N32: # %bb.0: # %entry @@ -426,16 +438,24 @@ ; MIPSR6-N32-NEXT: addu $1, $1, $25 ; MIPSR6-N32-NEXT: addiu $1, $1, %lo(%neg(%gp_rel(ffptoui))) ; MIPSR6-N32-NEXT: lw $2, %got_disp(h)($1) +; MIPSR6-N32-NEXT: lh $2, 0($2) +; MIPSR6-N32-NEXT: fill.h $w0, $2 +; MIPSR6-N32-NEXT: fexupr.w $w0, $w0 +; MIPSR6-N32-NEXT: copy_s.w $2, $w0[0] +; MIPSR6-N32-NEXT: mtc1 $2, $f0 ; MIPSR6-N32-NEXT: lw $1, %got_page(.LCPI3_0)($1) -; MIPSR6-N32-NEXT: lwc1 $f0, %got_ofst(.LCPI3_0)($1) -; MIPSR6-N32-NEXT: lh $1, 0($2) -; MIPSR6-N32-NEXT: fill.h $w1, $1 +; MIPSR6-N32-NEXT: lwc1 $f1, %got_ofst(.LCPI3_0)($1) +; MIPSR6-N32-NEXT: cmp.lt.s $f2, $f0, $f1 +; MIPSR6-N32-NEXT: sub.s $f1, $f0, $f1 +; MIPSR6-N32-NEXT: mfc1 $1, $f1 +; MIPSR6-N32-NEXT: fill.w $w1, $1 +; MIPSR6-N32-NEXT: fexdo.h $w1, $w1, $w1 ; MIPSR6-N32-NEXT: fexupr.w $w1, $w1 ; MIPSR6-N32-NEXT: copy_s.w $1, $w1[0] -; MIPSR6-N32-NEXT: mtc1 $1, $f2 -; MIPSR6-N32-NEXT: cmp.lt.s $f3, $f2, $f0 -; MIPSR6-N32-NEXT: sub.s $f0, $f2, $f0 -; MIPSR6-N32-NEXT: mfc1 $1, $f0 +; MIPSR6-N32-NEXT: mtc1 $1, $f1 +; MIPSR6-N32-NEXT: mov.s $f3, $f2 +; MIPSR6-N32-NEXT: sel.s $f3, $f1, $f0 +; MIPSR6-N32-NEXT: mfc1 $1, $f3 ; MIPSR6-N32-NEXT: fill.w $w0, $1 ; MIPSR6-N32-NEXT: fexdo.h $w0, $w0, $w0 ; MIPSR6-N32-NEXT: fexupr.w $w0, $w0 @@ -444,18 +464,11 @@ ; MIPSR6-N32-NEXT: dmtc1 $1, $f0 ; MIPSR6-N32-NEXT: trunc.w.d $f0, $f0 ; MIPSR6-N32-NEXT: mfc1 $1, $f0 -; MIPSR6-N32-NEXT: fexupr.d $w0, $w1 -; MIPSR6-N32-NEXT: copy_s.d $2, $w0[0] -; MIPSR6-N32-NEXT: dmtc1 $2, $f0 -; MIPSR6-N32-NEXT: trunc.w.d $f0, $f0 -; MIPSR6-N32-NEXT: mfc1 $2, $f0 -; MIPSR6-N32-NEXT: lui $3, 32768 -; MIPSR6-N32-NEXT: xor $1, $1, $3 -; MIPSR6-N32-NEXT: mfc1 $3, $f3 -; MIPSR6-N32-NEXT: seleqz $1, $1, $3 -; MIPSR6-N32-NEXT: selnez $2, $2, $3 +; MIPSR6-N32-NEXT: lui $2, 32768 +; MIPSR6-N32-NEXT: mfc1 $3, $f2 +; MIPSR6-N32-NEXT: seleqz $2, $2, $3 ; MIPSR6-N32-NEXT: jr $ra -; MIPSR6-N32-NEXT: or $2, $2, $1 +; MIPSR6-N32-NEXT: xor $2, $1, $2 ; ; MIPSR6-N64-LABEL: ffptoui: ; MIPSR6-N64: # %bb.0: # %entry @@ -463,16 +476,24 @@ ; MIPSR6-N64-NEXT: daddu $1, $1, $25 ; MIPSR6-N64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(ffptoui))) ; MIPSR6-N64-NEXT: ld $2, %got_disp(h)($1) +; MIPSR6-N64-NEXT: lh $2, 0($2) +; MIPSR6-N64-NEXT: fill.h $w0, $2 +; MIPSR6-N64-NEXT: fexupr.w $w0, $w0 +; MIPSR6-N64-NEXT: copy_s.w $2, $w0[0] +; MIPSR6-N64-NEXT: mtc1 $2, $f0 ; MIPSR6-N64-NEXT: ld $1, %got_page(.LCPI3_0)($1) -; MIPSR6-N64-NEXT: lwc1 $f0, %got_ofst(.LCPI3_0)($1) -; MIPSR6-N64-NEXT: lh $1, 0($2) -; MIPSR6-N64-NEXT: fill.h $w1, $1 +; MIPSR6-N64-NEXT: lwc1 $f1, %got_ofst(.LCPI3_0)($1) +; MIPSR6-N64-NEXT: cmp.lt.s $f2, $f0, $f1 +; MIPSR6-N64-NEXT: sub.s $f1, $f0, $f1 +; MIPSR6-N64-NEXT: mfc1 $1, $f1 +; MIPSR6-N64-NEXT: fill.w $w1, $1 +; MIPSR6-N64-NEXT: fexdo.h $w1, $w1, $w1 ; MIPSR6-N64-NEXT: fexupr.w $w1, $w1 ; MIPSR6-N64-NEXT: copy_s.w $1, $w1[0] -; MIPSR6-N64-NEXT: mtc1 $1, $f2 -; MIPSR6-N64-NEXT: cmp.lt.s $f3, $f2, $f0 -; MIPSR6-N64-NEXT: sub.s $f0, $f2, $f0 -; MIPSR6-N64-NEXT: mfc1 $1, $f0 +; MIPSR6-N64-NEXT: mtc1 $1, $f1 +; MIPSR6-N64-NEXT: mov.s $f3, $f2 +; MIPSR6-N64-NEXT: sel.s $f3, $f1, $f0 +; MIPSR6-N64-NEXT: mfc1 $1, $f3 ; MIPSR6-N64-NEXT: fill.w $w0, $1 ; MIPSR6-N64-NEXT: fexdo.h $w0, $w0, $w0 ; MIPSR6-N64-NEXT: fexupr.w $w0, $w0 @@ -481,18 +502,11 @@ ; MIPSR6-N64-NEXT: dmtc1 $1, $f0 ; MIPSR6-N64-NEXT: trunc.w.d $f0, $f0 ; MIPSR6-N64-NEXT: mfc1 $1, $f0 -; MIPSR6-N64-NEXT: fexupr.d $w0, $w1 -; MIPSR6-N64-NEXT: copy_s.d $2, $w0[0] -; MIPSR6-N64-NEXT: dmtc1 $2, $f0 -; MIPSR6-N64-NEXT: trunc.w.d $f0, $f0 -; MIPSR6-N64-NEXT: mfc1 $2, $f0 -; MIPSR6-N64-NEXT: lui $3, 32768 -; MIPSR6-N64-NEXT: xor $1, $1, $3 -; MIPSR6-N64-NEXT: mfc1 $3, $f3 -; MIPSR6-N64-NEXT: seleqz $1, $1, $3 -; MIPSR6-N64-NEXT: selnez $2, $2, $3 +; MIPSR6-N64-NEXT: lui $2, 32768 +; MIPSR6-N64-NEXT: mfc1 $3, $f2 +; MIPSR6-N64-NEXT: seleqz $2, $2, $3 ; MIPSR6-N64-NEXT: jr $ra -; MIPSR6-N64-NEXT: or $2, $2, $1 +; MIPSR6-N64-NEXT: xor $2, $1, $2 entry: %0 = load half, half * @h, align 2 %1 = fptoui half %0 to i32 Index: test/CodeGen/SystemZ/fp-conv-10.ll =================================================================== --- test/CodeGen/SystemZ/fp-conv-10.ll +++ test/CodeGen/SystemZ/fp-conv-10.ll @@ -13,16 +13,21 @@ ; CHECK-LABEL: f1: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI0_0 -; CHECK-NEXT: le %f1, 0(%r1) -; CHECK-NEXT: cebr %f0, %f1 -; CHECK-NEXT: jnl .LBB0_2 +; CHECK-NEXT: le %f2, 0(%r1) +; CHECK-NEXT: ler %f1, %f0 +; CHECK-NEXT: sebr %f1, %f2 +; CHECK-NEXT: cebr %f0, %f2 +; CHECK-NEXT: jl .LBB0_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: cfebr %r2, 5, %f0 -; CHECK-NEXT: br %r14 +; CHECK-NEXT: ler %f0, %f1 ; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: sebr %f0, %f1 +; CHECK-NEXT: lhi %r0, 0 +; CHECK-NEXT: jl .LBB0_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: cfebr %r2, 5, %f0 -; CHECK-NEXT: xilf %r2, 2147483648 +; CHECK-NEXT: xr %r2, %r0 ; CHECK-NEXT: br %r14 %conv = fptoui float %f to i32 ret i32 %conv @@ -33,16 +38,21 @@ ; CHECK-LABEL: f2: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI1_0 -; CHECK-NEXT: ldeb %f1, 0(%r1) -; CHECK-NEXT: cdbr %f0, %f1 -; CHECK-NEXT: jnl .LBB1_2 +; CHECK-NEXT: ldeb %f2, 0(%r1) +; CHECK-NEXT: ldr %f1, %f0 +; CHECK-NEXT: sdbr %f1, %f2 +; CHECK-NEXT: cdbr %f0, %f2 +; CHECK-NEXT: jl .LBB1_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: cfdbr %r2, 5, %f0 -; CHECK-NEXT: br %r14 +; CHECK-NEXT: ldr %f0, %f1 ; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: sdbr %f0, %f1 +; CHECK-NEXT: lhi %r0, 0 +; CHECK-NEXT: jl .LBB1_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: cfdbr %r2, 5, %f0 -; CHECK-NEXT: xilf %r2, 2147483648 +; CHECK-NEXT: xr %r2, %r0 ; CHECK-NEXT: br %r14 %conv = fptoui double %f to i32 ret i32 %conv @@ -55,16 +65,21 @@ ; CHECK-NEXT: ld %f0, 0(%r2) ; CHECK-NEXT: ld %f2, 8(%r2) ; CHECK-NEXT: larl %r1, .LCPI2_0 -; CHECK-NEXT: lxeb %f1, 0(%r1) -; CHECK-NEXT: cxbr %f0, %f1 -; CHECK-NEXT: jnl .LBB2_2 +; CHECK-NEXT: lxeb %f4, 0(%r1) +; CHECK-NEXT: lxr %f1, %f0 +; CHECK-NEXT: sxbr %f1, %f4 +; CHECK-NEXT: cxbr %f0, %f4 +; CHECK-NEXT: jl .LBB2_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: cfxbr %r2, 5, %f0 -; CHECK-NEXT: br %r14 +; CHECK-NEXT: lxr %f0, %f1 ; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: sxbr %f0, %f1 +; CHECK-NEXT: lhi %r0, 0 +; CHECK-NEXT: jl .LBB2_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: .LBB2_4: ; CHECK-NEXT: cfxbr %r2, 5, %f0 -; CHECK-NEXT: xilf %r2, 2147483648 +; CHECK-NEXT: xr %r2, %r0 ; CHECK-NEXT: br %r14 %f = load fp128, fp128 *%src %conv = fptoui fp128 %f to i32 Index: test/CodeGen/SystemZ/fp-conv-12.ll =================================================================== --- test/CodeGen/SystemZ/fp-conv-12.ll +++ test/CodeGen/SystemZ/fp-conv-12.ll @@ -12,16 +12,21 @@ ; CHECK-LABEL: f1: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI0_0 -; CHECK-NEXT: le %f1, 0(%r1) -; CHECK-NEXT: cebr %f0, %f1 -; CHECK-NEXT: jnl .LBB0_2 +; CHECK-NEXT: le %f2, 0(%r1) +; CHECK-NEXT: ler %f1, %f0 +; CHECK-NEXT: sebr %f1, %f2 +; CHECK-NEXT: cebr %f0, %f2 +; CHECK-NEXT: jl .LBB0_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: cgebr %r2, 5, %f0 -; CHECK-NEXT: br %r14 +; CHECK-NEXT: ler %f0, %f1 ; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: sebr %f0, %f1 +; CHECK-NEXT: lghi %r0, 0 +; CHECK-NEXT: jl .LBB0_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: cgebr %r2, 5, %f0 -; CHECK-NEXT: xihf %r2, 2147483648 +; CHECK-NEXT: xgr %r2, %r0 ; CHECK-NEXT: br %r14 %conv = fptoui float %f to i64 ret i64 %conv @@ -32,16 +37,21 @@ ; CHECK-LABEL: f2: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI1_0 -; CHECK-NEXT: ldeb %f1, 0(%r1) -; CHECK-NEXT: cdbr %f0, %f1 -; CHECK-NEXT: jnl .LBB1_2 +; CHECK-NEXT: ldeb %f2, 0(%r1) +; CHECK-NEXT: ldr %f1, %f0 +; CHECK-NEXT: sdbr %f1, %f2 +; CHECK-NEXT: cdbr %f0, %f2 +; CHECK-NEXT: jl .LBB1_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: cgdbr %r2, 5, %f0 -; CHECK-NEXT: br %r14 +; CHECK-NEXT: ldr %f0, %f1 ; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: sdbr %f0, %f1 +; CHECK-NEXT: lghi %r0, 0 +; CHECK-NEXT: jl .LBB1_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: cgdbr %r2, 5, %f0 -; CHECK-NEXT: xihf %r2, 2147483648 +; CHECK-NEXT: xgr %r2, %r0 ; CHECK-NEXT: br %r14 %conv = fptoui double %f to i64 ret i64 %conv @@ -54,16 +64,21 @@ ; CHECK-NEXT: ld %f0, 0(%r2) ; CHECK-NEXT: ld %f2, 8(%r2) ; CHECK-NEXT: larl %r1, .LCPI2_0 -; CHECK-NEXT: lxeb %f1, 0(%r1) -; CHECK-NEXT: cxbr %f0, %f1 -; CHECK-NEXT: jnl .LBB2_2 +; CHECK-NEXT: lxeb %f4, 0(%r1) +; CHECK-NEXT: lxr %f1, %f0 +; CHECK-NEXT: sxbr %f1, %f4 +; CHECK-NEXT: cxbr %f0, %f4 +; CHECK-NEXT: jl .LBB2_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: cgxbr %r2, 5, %f0 -; CHECK-NEXT: br %r14 +; CHECK-NEXT: lxr %f0, %f1 ; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: sxbr %f0, %f1 +; CHECK-NEXT: lghi %r0, 0 +; CHECK-NEXT: jl .LBB2_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: .LBB2_4: ; CHECK-NEXT: cgxbr %r2, 5, %f0 -; CHECK-NEXT: xihf %r2, 2147483648 +; CHECK-NEXT: xgr %r2, %r0 ; CHECK-NEXT: br %r14 %f = load fp128, fp128 *%src %conv = fptoui fp128 %f to i64 Index: test/CodeGen/X86/fp-cvt.ll =================================================================== --- test/CodeGen/X86/fp-cvt.ll +++ test/CodeGen/X86/fp-cvt.ll @@ -483,29 +483,20 @@ ; X64-X87-NEXT: flds {{.*}}(%rip) ; X64-X87-NEXT: fld %st(1) ; X64-X87-NEXT: fsub %st(1) +; X64-X87-NEXT: xorl %eax, %eax +; X64-X87-NEXT: fxch %st(1) +; X64-X87-NEXT: fucompi %st(2) +; X64-X87-NEXT: fcmovnbe %st(1), %st(0) +; X64-X87-NEXT: fstp %st(1) ; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F -; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx ; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fld %st(1) +; X64-X87-NEXT: movw %cx, -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fucompi %st(1) -; X64-X87-NEXT: fstp %st(0) -; X64-X87-NEXT: jbe .LBB10_1 -; X64-X87-NEXT: # %bb.2: -; X64-X87-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; X64-X87-NEXT: retq -; X64-X87-NEXT: .LBB10_1: -; X64-X87-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; X64-X87-NEXT: setbe %al +; X64-X87-NEXT: shlq $63, %rax ; X64-X87-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; X64-X87-NEXT: retq ; @@ -515,17 +506,14 @@ ; X64-SSSE3-NEXT: flds {{.*}}(%rip) ; X64-SSSE3-NEXT: fld %st(1) ; X64-SSSE3-NEXT: fsub %st(1) +; X64-SSSE3-NEXT: xorl %eax, %eax +; X64-SSSE3-NEXT: fxch %st(1) +; X64-SSSE3-NEXT: fucompi %st(2) +; X64-SSSE3-NEXT: fcmovnbe %st(1), %st(0) +; X64-SSSE3-NEXT: fstp %st(1) ; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; X64-SSSE3-NEXT: fld %st(1) -; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; X64-SSSE3-NEXT: fucompi %st(1) -; X64-SSSE3-NEXT: fstp %st(0) -; X64-SSSE3-NEXT: jbe .LBB10_1 -; X64-SSSE3-NEXT: # %bb.2: -; X64-SSSE3-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; X64-SSSE3-NEXT: retq -; X64-SSSE3-NEXT: .LBB10_1: -; X64-SSSE3-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; X64-SSSE3-NEXT: setbe %al +; X64-SSSE3-NEXT: shlq $63, %rax ; X64-SSSE3-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; X64-SSSE3-NEXT: retq %1 = fptoui x86_fp80 %a0 to i64 @@ -577,29 +565,20 @@ ; X64-X87-NEXT: flds {{.*}}(%rip) ; X64-X87-NEXT: fld %st(1) ; X64-X87-NEXT: fsub %st(1) +; X64-X87-NEXT: xorl %eax, %eax +; X64-X87-NEXT: fxch %st(1) +; X64-X87-NEXT: fucompi %st(2) +; X64-X87-NEXT: fcmovnbe %st(1), %st(0) +; X64-X87-NEXT: fstp %st(1) ; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F -; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx ; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fld %st(1) +; X64-X87-NEXT: movw %cx, -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp) ; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-X87-NEXT: fucompi %st(1) -; X64-X87-NEXT: fstp %st(0) -; X64-X87-NEXT: jbe .LBB11_1 -; X64-X87-NEXT: # %bb.2: -; X64-X87-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; X64-X87-NEXT: retq -; X64-X87-NEXT: .LBB11_1: -; X64-X87-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; X64-X87-NEXT: setbe %al +; X64-X87-NEXT: shlq $63, %rax ; X64-X87-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; X64-X87-NEXT: retq ; @@ -609,17 +588,14 @@ ; X64-SSSE3-NEXT: flds {{.*}}(%rip) ; X64-SSSE3-NEXT: fld %st(1) ; X64-SSSE3-NEXT: fsub %st(1) +; X64-SSSE3-NEXT: xorl %eax, %eax +; X64-SSSE3-NEXT: fxch %st(1) +; X64-SSSE3-NEXT: fucompi %st(2) +; X64-SSSE3-NEXT: fcmovnbe %st(1), %st(0) +; X64-SSSE3-NEXT: fstp %st(1) ; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; X64-SSSE3-NEXT: fld %st(1) -; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; X64-SSSE3-NEXT: fucompi %st(1) -; X64-SSSE3-NEXT: fstp %st(0) -; X64-SSSE3-NEXT: jbe .LBB11_1 -; X64-SSSE3-NEXT: # %bb.2: -; X64-SSSE3-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; X64-SSSE3-NEXT: retq -; X64-SSSE3-NEXT: .LBB11_1: -; X64-SSSE3-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; X64-SSSE3-NEXT: setbe %al +; X64-SSSE3-NEXT: shlq $63, %rax ; X64-SSSE3-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; X64-SSSE3-NEXT: retq %1 = load x86_fp80, x86_fp80 *%a0 Index: test/CodeGen/X86/ftrunc.ll =================================================================== --- test/CodeGen/X86/ftrunc.ll +++ test/CodeGen/X86/ftrunc.ll @@ -31,13 +31,18 @@ ; SSE2: # %bb.0: ; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE2-NEXT: movapd %xmm0, %xmm2 -; SSE2-NEXT: subsd %xmm1, %xmm2 -; SSE2-NEXT: cvttsd2si %xmm2, %rax -; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE2-NEXT: xorq %rax, %rcx -; SSE2-NEXT: cvttsd2si %xmm0, %rax +; SSE2-NEXT: cmpltsd %xmm1, %xmm2 +; SSE2-NEXT: movapd %xmm2, %xmm3 +; SSE2-NEXT: andpd %xmm0, %xmm2 +; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: ucomisd %xmm1, %xmm0 -; SSE2-NEXT: cmovaeq %rcx, %rax +; SSE2-NEXT: subsd %xmm1, %xmm0 +; SSE2-NEXT: andnpd %xmm0, %xmm3 +; SSE2-NEXT: orpd %xmm3, %xmm2 +; SSE2-NEXT: cvttsd2si %xmm2, %rcx +; SSE2-NEXT: setae %al +; SSE2-NEXT: shlq $63, %rax +; SSE2-NEXT: xorq %rcx, %rax ; SSE2-NEXT: movq %rax, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] ; SSE2-NEXT: subpd {{.*}}(%rip), %xmm1 @@ -109,22 +114,34 @@ ; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero ; SSE2-NEXT: movapd %xmm0, %xmm1 ; SSE2-NEXT: subsd %xmm2, %xmm1 -; SSE2-NEXT: cvttsd2si %xmm1, %rax -; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE2-NEXT: xorq %rcx, %rax -; SSE2-NEXT: cvttsd2si %xmm0, %rdx +; SSE2-NEXT: movapd %xmm0, %xmm3 +; SSE2-NEXT: cmpltsd %xmm2, %xmm3 +; SSE2-NEXT: movapd %xmm3, %xmm4 +; SSE2-NEXT: andnpd %xmm1, %xmm4 +; SSE2-NEXT: andpd %xmm0, %xmm3 +; SSE2-NEXT: orpd %xmm4, %xmm3 +; SSE2-NEXT: cvttsd2si %xmm3, %rax +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: ucomisd %xmm2, %xmm0 -; SSE2-NEXT: cmovaeq %rax, %rdx -; SSE2-NEXT: movq %rdx, %xmm1 +; SSE2-NEXT: setae %cl +; SSE2-NEXT: shlq $63, %rcx +; SSE2-NEXT: xorq %rax, %rcx +; SSE2-NEXT: movq %rcx, %xmm1 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE2-NEXT: movapd %xmm0, %xmm3 -; SSE2-NEXT: subsd %xmm2, %xmm3 -; SSE2-NEXT: cvttsd2si %xmm3, %rax -; SSE2-NEXT: xorq %rcx, %rax -; SSE2-NEXT: cvttsd2si %xmm0, %rcx +; SSE2-NEXT: cmpltsd %xmm2, %xmm3 +; SSE2-NEXT: movapd %xmm3, %xmm4 +; SSE2-NEXT: andpd %xmm0, %xmm3 +; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: ucomisd %xmm2, %xmm0 -; SSE2-NEXT: cmovaeq %rax, %rcx -; SSE2-NEXT: movq %rcx, %xmm0 +; SSE2-NEXT: subsd %xmm2, %xmm0 +; SSE2-NEXT: andnpd %xmm0, %xmm4 +; SSE2-NEXT: orpd %xmm4, %xmm3 +; SSE2-NEXT: cvttsd2si %xmm3, %rcx +; SSE2-NEXT: setae %al +; SSE2-NEXT: shlq $63, %rax +; SSE2-NEXT: xorq %rcx, %rax +; SSE2-NEXT: movq %rax, %xmm0 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4294967295,4294967295] ; SSE2-NEXT: pand %xmm1, %xmm0 @@ -156,39 +173,64 @@ ; SSE2-NEXT: movapd %xmm1, %xmm2 ; SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero ; SSE2-NEXT: subsd %xmm3, %xmm1 -; SSE2-NEXT: cvttsd2si %xmm1, %rcx -; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE2-NEXT: xorq %rax, %rcx -; SSE2-NEXT: cvttsd2si %xmm2, %rdx +; SSE2-NEXT: movapd %xmm2, %xmm4 +; SSE2-NEXT: cmpltsd %xmm3, %xmm4 +; SSE2-NEXT: movapd %xmm4, %xmm5 +; SSE2-NEXT: andnpd %xmm1, %xmm5 +; SSE2-NEXT: andpd %xmm2, %xmm4 +; SSE2-NEXT: orpd %xmm5, %xmm4 +; SSE2-NEXT: cvttsd2si %xmm4, %rax +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: ucomisd %xmm3, %xmm2 -; SSE2-NEXT: cmovaeq %rcx, %rdx -; SSE2-NEXT: movq %rdx, %xmm1 +; SSE2-NEXT: setae %cl +; SSE2-NEXT: shlq $63, %rcx +; SSE2-NEXT: xorq %rax, %rcx +; SSE2-NEXT: movq %rcx, %xmm1 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE2-NEXT: movapd %xmm2, %xmm4 -; SSE2-NEXT: subsd %xmm3, %xmm4 -; SSE2-NEXT: cvttsd2si %xmm4, %rcx -; SSE2-NEXT: xorq %rax, %rcx -; SSE2-NEXT: cvttsd2si %xmm2, %rdx +; SSE2-NEXT: cmpltsd %xmm3, %xmm4 +; SSE2-NEXT: movapd %xmm4, %xmm5 +; SSE2-NEXT: andpd %xmm2, %xmm4 +; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: ucomisd %xmm3, %xmm2 -; SSE2-NEXT: cmovaeq %rcx, %rdx -; SSE2-NEXT: movq %rdx, %xmm2 +; SSE2-NEXT: subsd %xmm3, %xmm2 +; SSE2-NEXT: andnpd %xmm2, %xmm5 +; SSE2-NEXT: orpd %xmm5, %xmm4 +; SSE2-NEXT: cvttsd2si %xmm4, %rcx +; SSE2-NEXT: setae %al +; SSE2-NEXT: shlq $63, %rax +; SSE2-NEXT: xorq %rcx, %rax +; SSE2-NEXT: movq %rax, %xmm2 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE2-NEXT: movapd %xmm0, %xmm2 ; SSE2-NEXT: subsd %xmm3, %xmm2 -; SSE2-NEXT: cvttsd2si %xmm2, %rcx -; SSE2-NEXT: xorq %rax, %rcx -; SSE2-NEXT: cvttsd2si %xmm0, %rdx +; SSE2-NEXT: movapd %xmm0, %xmm4 +; SSE2-NEXT: cmpltsd %xmm3, %xmm4 +; SSE2-NEXT: movapd %xmm4, %xmm5 +; SSE2-NEXT: andnpd %xmm2, %xmm5 +; SSE2-NEXT: andpd %xmm0, %xmm4 +; SSE2-NEXT: orpd %xmm5, %xmm4 +; SSE2-NEXT: cvttsd2si %xmm4, %rax +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: ucomisd %xmm3, %xmm0 -; SSE2-NEXT: cmovaeq %rcx, %rdx -; SSE2-NEXT: movq %rdx, %xmm2 +; SSE2-NEXT: setae %cl +; SSE2-NEXT: shlq $63, %rcx +; SSE2-NEXT: xorq %rax, %rcx +; SSE2-NEXT: movq %rcx, %xmm2 ; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE2-NEXT: movapd %xmm0, %xmm4 -; SSE2-NEXT: subsd %xmm3, %xmm4 -; SSE2-NEXT: cvttsd2si %xmm4, %rcx -; SSE2-NEXT: xorq %rax, %rcx -; SSE2-NEXT: cvttsd2si %xmm0, %rax +; SSE2-NEXT: cmpltsd %xmm3, %xmm4 +; SSE2-NEXT: movapd %xmm4, %xmm5 +; SSE2-NEXT: andpd %xmm0, %xmm4 +; SSE2-NEXT: xorl %eax, %eax ; SSE2-NEXT: ucomisd %xmm3, %xmm0 -; SSE2-NEXT: cmovaeq %rcx, %rax +; SSE2-NEXT: subsd %xmm3, %xmm0 +; SSE2-NEXT: andnpd %xmm0, %xmm5 +; SSE2-NEXT: orpd %xmm5, %xmm4 +; SSE2-NEXT: cvttsd2si %xmm4, %rcx +; SSE2-NEXT: setae %al +; SSE2-NEXT: shlq $63, %rax +; SSE2-NEXT: xorq %rcx, %rax ; SSE2-NEXT: movq %rax, %xmm0 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4294967295,4294967295] Index: test/CodeGen/X86/half.ll =================================================================== --- test/CodeGen/X86/half.ll +++ test/CodeGen/X86/half.ll @@ -273,13 +273,18 @@ ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee ; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm2 -; CHECK-LIBCALL-NEXT: subss %xmm1, %xmm2 -; CHECK-LIBCALL-NEXT: cvttss2si %xmm2, %rax -; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; CHECK-LIBCALL-NEXT: xorq %rax, %rcx -; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax +; CHECK-LIBCALL-NEXT: cmpltss %xmm1, %xmm2 +; CHECK-LIBCALL-NEXT: movaps %xmm2, %xmm3 +; CHECK-LIBCALL-NEXT: andps %xmm0, %xmm2 +; CHECK-LIBCALL-NEXT: xorl %eax, %eax ; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 -; CHECK-LIBCALL-NEXT: cmovaeq %rcx, %rax +; CHECK-LIBCALL-NEXT: subss %xmm1, %xmm0 +; CHECK-LIBCALL-NEXT: andnps %xmm0, %xmm3 +; CHECK-LIBCALL-NEXT: orps %xmm3, %xmm2 +; CHECK-LIBCALL-NEXT: cvttss2si %xmm2, %rcx +; CHECK-LIBCALL-NEXT: setae %al +; CHECK-LIBCALL-NEXT: shlq $63, %rax +; CHECK-LIBCALL-NEXT: xorq %rcx, %rax ; CHECK-LIBCALL-NEXT: popq %rcx ; CHECK-LIBCALL-NEXT: retq ; @@ -289,13 +294,15 @@ ; BWON-F16C-NEXT: vmovd %eax, %xmm0 ; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 ; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; BWON-F16C-NEXT: vsubss %xmm1, %xmm0, %xmm2 -; BWON-F16C-NEXT: vcvttss2si %xmm2, %rax -; BWON-F16C-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; BWON-F16C-NEXT: xorq %rax, %rcx -; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax +; BWON-F16C-NEXT: vcmpltss %xmm1, %xmm0, %xmm2 +; BWON-F16C-NEXT: vsubss %xmm1, %xmm0, %xmm3 +; BWON-F16C-NEXT: vblendvps %xmm2, %xmm0, %xmm3, %xmm2 +; BWON-F16C-NEXT: vcvttss2si %xmm2, %rcx +; BWON-F16C-NEXT: xorl %eax, %eax ; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 -; BWON-F16C-NEXT: cmovaeq %rcx, %rax +; BWON-F16C-NEXT: setae %al +; BWON-F16C-NEXT: shlq $63, %rax +; BWON-F16C-NEXT: xorq %rcx, %rax ; BWON-F16C-NEXT: retq ; ; CHECK-I686-LABEL: test_fptoui_i64: Index: test/CodeGen/X86/scalar-fp-to-i64.ll =================================================================== --- test/CodeGen/X86/scalar-fp-to-i64.ll +++ test/CodeGen/X86/scalar-fp-to-i64.ll @@ -180,13 +180,18 @@ ; SSE3_64: # %bb.0: ; SSE3_64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE3_64-NEXT: movaps %xmm0, %xmm2 -; SSE3_64-NEXT: subss %xmm1, %xmm2 -; SSE3_64-NEXT: cvttss2si %xmm2, %rax -; SSE3_64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE3_64-NEXT: xorq %rax, %rcx -; SSE3_64-NEXT: cvttss2si %xmm0, %rax +; SSE3_64-NEXT: cmpltss %xmm1, %xmm2 +; SSE3_64-NEXT: movaps %xmm2, %xmm3 +; SSE3_64-NEXT: andps %xmm0, %xmm2 +; SSE3_64-NEXT: xorl %eax, %eax ; SSE3_64-NEXT: ucomiss %xmm1, %xmm0 -; SSE3_64-NEXT: cmovaeq %rcx, %rax +; SSE3_64-NEXT: subss %xmm1, %xmm0 +; SSE3_64-NEXT: andnps %xmm0, %xmm3 +; SSE3_64-NEXT: orps %xmm3, %xmm2 +; SSE3_64-NEXT: cvttss2si %xmm2, %rcx +; SSE3_64-NEXT: setae %al +; SSE3_64-NEXT: shlq $63, %rax +; SSE3_64-NEXT: xorq %rcx, %rax ; SSE3_64-NEXT: retq ; ; SSE2_32_WIN-LABEL: f_to_u64: @@ -257,13 +262,18 @@ ; SSE2_64: # %bb.0: ; SSE2_64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2_64-NEXT: movaps %xmm0, %xmm2 -; SSE2_64-NEXT: subss %xmm1, %xmm2 -; SSE2_64-NEXT: cvttss2si %xmm2, %rax -; SSE2_64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE2_64-NEXT: xorq %rax, %rcx -; SSE2_64-NEXT: cvttss2si %xmm0, %rax +; SSE2_64-NEXT: cmpltss %xmm1, %xmm2 +; SSE2_64-NEXT: movaps %xmm2, %xmm3 +; SSE2_64-NEXT: andps %xmm0, %xmm2 +; SSE2_64-NEXT: xorl %eax, %eax ; SSE2_64-NEXT: ucomiss %xmm1, %xmm0 -; SSE2_64-NEXT: cmovaeq %rcx, %rax +; SSE2_64-NEXT: subss %xmm1, %xmm0 +; SSE2_64-NEXT: andnps %xmm0, %xmm3 +; SSE2_64-NEXT: orps %xmm3, %xmm2 +; SSE2_64-NEXT: cvttss2si %xmm2, %rcx +; SSE2_64-NEXT: setae %al +; SSE2_64-NEXT: shlq $63, %rax +; SSE2_64-NEXT: xorq %rcx, %rax ; SSE2_64-NEXT: retq ; ; X87_WIN-LABEL: f_to_u64: @@ -668,13 +678,18 @@ ; SSE3_64: # %bb.0: ; SSE3_64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE3_64-NEXT: movapd %xmm0, %xmm2 -; SSE3_64-NEXT: subsd %xmm1, %xmm2 -; SSE3_64-NEXT: cvttsd2si %xmm2, %rax -; SSE3_64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE3_64-NEXT: xorq %rax, %rcx -; SSE3_64-NEXT: cvttsd2si %xmm0, %rax +; SSE3_64-NEXT: cmpltsd %xmm1, %xmm2 +; SSE3_64-NEXT: movapd %xmm2, %xmm3 +; SSE3_64-NEXT: andpd %xmm0, %xmm2 +; SSE3_64-NEXT: xorl %eax, %eax ; SSE3_64-NEXT: ucomisd %xmm1, %xmm0 -; SSE3_64-NEXT: cmovaeq %rcx, %rax +; SSE3_64-NEXT: subsd %xmm1, %xmm0 +; SSE3_64-NEXT: andnpd %xmm0, %xmm3 +; SSE3_64-NEXT: orpd %xmm3, %xmm2 +; SSE3_64-NEXT: cvttsd2si %xmm2, %rcx +; SSE3_64-NEXT: setae %al +; SSE3_64-NEXT: shlq $63, %rax +; SSE3_64-NEXT: xorq %rcx, %rax ; SSE3_64-NEXT: retq ; ; SSE2_32_WIN-LABEL: d_to_u64: @@ -745,13 +760,18 @@ ; SSE2_64: # %bb.0: ; SSE2_64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE2_64-NEXT: movapd %xmm0, %xmm2 -; SSE2_64-NEXT: subsd %xmm1, %xmm2 -; SSE2_64-NEXT: cvttsd2si %xmm2, %rax -; SSE2_64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE2_64-NEXT: xorq %rax, %rcx -; SSE2_64-NEXT: cvttsd2si %xmm0, %rax +; SSE2_64-NEXT: cmpltsd %xmm1, %xmm2 +; SSE2_64-NEXT: movapd %xmm2, %xmm3 +; SSE2_64-NEXT: andpd %xmm0, %xmm2 +; SSE2_64-NEXT: xorl %eax, %eax ; SSE2_64-NEXT: ucomisd %xmm1, %xmm0 -; SSE2_64-NEXT: cmovaeq %rcx, %rax +; SSE2_64-NEXT: subsd %xmm1, %xmm0 +; SSE2_64-NEXT: andnpd %xmm0, %xmm3 +; SSE2_64-NEXT: orpd %xmm3, %xmm2 +; SSE2_64-NEXT: cvttsd2si %xmm2, %rcx +; SSE2_64-NEXT: setae %al +; SSE2_64-NEXT: shlq $63, %rax +; SSE2_64-NEXT: xorq %rcx, %rax ; SSE2_64-NEXT: retq ; ; X87_WIN-LABEL: d_to_u64: @@ -1147,25 +1167,21 @@ ; ; SSE3_64_WIN-LABEL: x_to_u64: ; SSE3_64_WIN: # %bb.0: -; SSE3_64_WIN-NEXT: subq $16, %rsp +; SSE3_64_WIN-NEXT: pushq %rax ; SSE3_64_WIN-NEXT: fldt (%rcx) ; SSE3_64_WIN-NEXT: flds __real@{{.*}}(%rip) ; SSE3_64_WIN-NEXT: fld %st(1) ; SSE3_64_WIN-NEXT: fsub %st(1) -; SSE3_64_WIN-NEXT: fisttpll {{[0-9]+}}(%rsp) -; SSE3_64_WIN-NEXT: fld %st(1) +; SSE3_64_WIN-NEXT: xorl %eax, %eax +; SSE3_64_WIN-NEXT: fxch %st(1) +; SSE3_64_WIN-NEXT: fucompi %st(2) +; SSE3_64_WIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE3_64_WIN-NEXT: fstp %st(1) ; SSE3_64_WIN-NEXT: fisttpll (%rsp) -; SSE3_64_WIN-NEXT: fucompi %st(1) -; SSE3_64_WIN-NEXT: fstp %st(0) -; SSE3_64_WIN-NEXT: jbe .LBB4_1 -; SSE3_64_WIN-NEXT: # %bb.2: -; SSE3_64_WIN-NEXT: movq (%rsp), %rax -; SSE3_64_WIN-NEXT: addq $16, %rsp -; SSE3_64_WIN-NEXT: retq -; SSE3_64_WIN-NEXT: .LBB4_1: -; SSE3_64_WIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE3_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax -; SSE3_64_WIN-NEXT: addq $16, %rsp +; SSE3_64_WIN-NEXT: setbe %al +; SSE3_64_WIN-NEXT: shlq $63, %rax +; SSE3_64_WIN-NEXT: xorq (%rsp), %rax +; SSE3_64_WIN-NEXT: popq %rcx ; SSE3_64_WIN-NEXT: retq ; ; SSE3_64_LIN-LABEL: x_to_u64: @@ -1174,17 +1190,14 @@ ; SSE3_64_LIN-NEXT: flds {{.*}}(%rip) ; SSE3_64_LIN-NEXT: fld %st(1) ; SSE3_64_LIN-NEXT: fsub %st(1) +; SSE3_64_LIN-NEXT: xorl %eax, %eax +; SSE3_64_LIN-NEXT: fxch %st(1) +; SSE3_64_LIN-NEXT: fucompi %st(2) +; SSE3_64_LIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE3_64_LIN-NEXT: fstp %st(1) ; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; SSE3_64_LIN-NEXT: fld %st(1) -; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) -; SSE3_64_LIN-NEXT: fucompi %st(1) -; SSE3_64_LIN-NEXT: fstp %st(0) -; SSE3_64_LIN-NEXT: jbe .LBB4_1 -; SSE3_64_LIN-NEXT: # %bb.2: -; SSE3_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; SSE3_64_LIN-NEXT: retq -; SSE3_64_LIN-NEXT: .LBB4_1: -; SSE3_64_LIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; SSE3_64_LIN-NEXT: setbe %al +; SSE3_64_LIN-NEXT: shlq $63, %rax ; SSE3_64_LIN-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; SSE3_64_LIN-NEXT: retq ; @@ -1246,37 +1259,27 @@ ; ; SSE2_64_WIN-LABEL: x_to_u64: ; SSE2_64_WIN: # %bb.0: -; SSE2_64_WIN-NEXT: subq $24, %rsp +; SSE2_64_WIN-NEXT: subq $16, %rsp ; SSE2_64_WIN-NEXT: fldt (%rcx) ; SSE2_64_WIN-NEXT: flds __real@{{.*}}(%rip) ; SSE2_64_WIN-NEXT: fld %st(1) ; SSE2_64_WIN-NEXT: fsub %st(1) +; SSE2_64_WIN-NEXT: xorl %eax, %eax +; SSE2_64_WIN-NEXT: fxch %st(1) +; SSE2_64_WIN-NEXT: fucompi %st(2) +; SSE2_64_WIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE2_64_WIN-NEXT: fstp %st(1) ; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax +; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx ; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F ; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: movw %ax, {{[0-9]+}}(%rsp) +; SSE2_64_WIN-NEXT: movw %cx, {{[0-9]+}}(%rsp) ; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp) ; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax -; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F -; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: movw %ax, {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: fld %st(1) -; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) -; SSE2_64_WIN-NEXT: fucompi %st(1) -; SSE2_64_WIN-NEXT: fstp %st(0) -; SSE2_64_WIN-NEXT: jbe .LBB4_1 -; SSE2_64_WIN-NEXT: # %bb.2: -; SSE2_64_WIN-NEXT: movq {{[0-9]+}}(%rsp), %rax -; SSE2_64_WIN-NEXT: addq $24, %rsp -; SSE2_64_WIN-NEXT: retq -; SSE2_64_WIN-NEXT: .LBB4_1: -; SSE2_64_WIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; SSE2_64_WIN-NEXT: setbe %al +; SSE2_64_WIN-NEXT: shlq $63, %rax ; SSE2_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax -; SSE2_64_WIN-NEXT: addq $24, %rsp +; SSE2_64_WIN-NEXT: addq $16, %rsp ; SSE2_64_WIN-NEXT: retq ; ; SSE2_64_LIN-LABEL: x_to_u64: @@ -1285,29 +1288,20 @@ ; SSE2_64_LIN-NEXT: flds {{.*}}(%rip) ; SSE2_64_LIN-NEXT: fld %st(1) ; SSE2_64_LIN-NEXT: fsub %st(1) +; SSE2_64_LIN-NEXT: xorl %eax, %eax +; SSE2_64_LIN-NEXT: fxch %st(1) +; SSE2_64_LIN-NEXT: fucompi %st(2) +; SSE2_64_LIN-NEXT: fcmovnbe %st(1), %st(0) +; SSE2_64_LIN-NEXT: fstp %st(1) ; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx ; SSE2_64_LIN-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F ; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: movw %ax, -{{[0-9]+}}(%rsp) +; SSE2_64_LIN-NEXT: movw %cx, -{{[0-9]+}}(%rsp) ; SSE2_64_LIN-NEXT: fistpll -{{[0-9]+}}(%rsp) ; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax -; SSE2_64_LIN-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F -; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: fld %st(1) -; SSE2_64_LIN-NEXT: fistpll -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) -; SSE2_64_LIN-NEXT: fucompi %st(1) -; SSE2_64_LIN-NEXT: fstp %st(0) -; SSE2_64_LIN-NEXT: jbe .LBB4_1 -; SSE2_64_LIN-NEXT: # %bb.2: -; SSE2_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; SSE2_64_LIN-NEXT: retq -; SSE2_64_LIN-NEXT: .LBB4_1: -; SSE2_64_LIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 +; SSE2_64_LIN-NEXT: setbe %al +; SSE2_64_LIN-NEXT: shlq $63, %rax ; SSE2_64_LIN-NEXT: xorq -{{[0-9]+}}(%rsp), %rax ; SSE2_64_LIN-NEXT: retq ; Index: test/CodeGen/X86/vec_fp_to_int.ll =================================================================== --- test/CodeGen/X86/vec_fp_to_int.ll +++ test/CodeGen/X86/vec_fp_to_int.ll @@ -320,22 +320,34 @@ ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: subsd %xmm2, %xmm1 -; SSE-NEXT: cvttsd2si %xmm1, %rax -; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttsd2si %xmm0, %rdx +; SSE-NEXT: movapd %xmm0, %xmm3 +; SSE-NEXT: cmpltsd %xmm2, %xmm3 +; SSE-NEXT: movapd %xmm3, %xmm4 +; SSE-NEXT: andnpd %xmm1, %xmm4 +; SSE-NEXT: andpd %xmm0, %xmm3 +; SSE-NEXT: orpd %xmm4, %xmm3 +; SSE-NEXT: cvttsd2si %xmm3, %rax +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: ucomisd %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: setae %cl +; SSE-NEXT: shlq $63, %rcx +; SSE-NEXT: xorq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movapd %xmm0, %xmm3 -; SSE-NEXT: subsd %xmm2, %xmm3 -; SSE-NEXT: cvttsd2si %xmm3, %rax -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttsd2si %xmm0, %rcx +; SSE-NEXT: cmpltsd %xmm2, %xmm3 +; SSE-NEXT: movapd %xmm3, %xmm4 +; SSE-NEXT: andpd %xmm0, %xmm3 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomisd %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rcx -; SSE-NEXT: movq %rcx, %xmm0 +; SSE-NEXT: subsd %xmm2, %xmm0 +; SSE-NEXT: andnpd %xmm0, %xmm4 +; SSE-NEXT: orpd %xmm4, %xmm3 +; SSE-NEXT: cvttsd2si %xmm3, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq @@ -343,21 +355,26 @@ ; VEX-LABEL: fptoui_2f64_to_2i64: ; VEX: # %bb.0: ; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm2 ; VEX-NEXT: vcvttsd2si %xmm2, %rax -; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttsd2si %xmm0, %rdx +; VEX-NEXT: xorl %ecx, %ecx ; VEX-NEXT: vucomisd %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rdx -; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: setae %cl +; VEX-NEXT: shlq $63, %rcx +; VEX-NEXT: xorq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm2 ; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm4 +; VEX-NEXT: vblendvpd %xmm3, %xmm0, %xmm4, %xmm3 ; VEX-NEXT: vcvttsd2si %xmm3, %rax -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttsd2si %xmm0, %rcx +; VEX-NEXT: xorl %ecx, %ecx ; VEX-NEXT: vucomisd %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: setae %cl +; VEX-NEXT: shlq $63, %rcx +; VEX-NEXT: xorq %rax, %rcx ; VEX-NEXT: vmovq %rcx, %xmm0 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] ; VEX-NEXT: retq @@ -419,22 +436,34 @@ ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: subsd %xmm2, %xmm1 -; SSE-NEXT: cvttsd2si %xmm1, %rax -; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttsd2si %xmm0, %rdx +; SSE-NEXT: movapd %xmm0, %xmm3 +; SSE-NEXT: cmpltsd %xmm2, %xmm3 +; SSE-NEXT: movapd %xmm3, %xmm4 +; SSE-NEXT: andnpd %xmm1, %xmm4 +; SSE-NEXT: andpd %xmm0, %xmm3 +; SSE-NEXT: orpd %xmm4, %xmm3 +; SSE-NEXT: cvttsd2si %xmm3, %rax +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: ucomisd %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: setae %cl +; SSE-NEXT: shlq $63, %rcx +; SSE-NEXT: xorq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movapd %xmm0, %xmm3 -; SSE-NEXT: subsd %xmm2, %xmm3 -; SSE-NEXT: cvttsd2si %xmm3, %rax -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttsd2si %xmm0, %rcx +; SSE-NEXT: cmpltsd %xmm2, %xmm3 +; SSE-NEXT: movapd %xmm3, %xmm4 +; SSE-NEXT: andpd %xmm0, %xmm3 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomisd %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rcx -; SSE-NEXT: movq %rcx, %xmm0 +; SSE-NEXT: subsd %xmm2, %xmm0 +; SSE-NEXT: andnpd %xmm0, %xmm4 +; SSE-NEXT: orpd %xmm4, %xmm3 +; SSE-NEXT: cvttsd2si %xmm3, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: pxor %xmm0, %xmm0 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] @@ -444,21 +473,26 @@ ; VEX-LABEL: fptoui_2f64_to_4i32: ; VEX: # %bb.0: ; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm2 ; VEX-NEXT: vcvttsd2si %xmm2, %rax -; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttsd2si %xmm0, %rdx +; VEX-NEXT: xorl %ecx, %ecx ; VEX-NEXT: vucomisd %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rdx -; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: setae %cl +; VEX-NEXT: shlq $63, %rcx +; VEX-NEXT: xorq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm2 ; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm4 +; VEX-NEXT: vblendvpd %xmm3, %xmm0, %xmm4, %xmm3 ; VEX-NEXT: vcvttsd2si %xmm3, %rax -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttsd2si %xmm0, %rcx +; VEX-NEXT: xorl %ecx, %ecx ; VEX-NEXT: vucomisd %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: setae %cl +; VEX-NEXT: shlq $63, %rcx +; VEX-NEXT: xorq %rax, %rcx ; VEX-NEXT: vmovq %rcx, %xmm0 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero @@ -513,22 +547,34 @@ ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: movapd %xmm0, %xmm2 ; SSE-NEXT: subsd %xmm1, %xmm2 -; SSE-NEXT: cvttsd2si %xmm2, %rax -; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttsd2si %xmm0, %rdx +; SSE-NEXT: movapd %xmm0, %xmm3 +; SSE-NEXT: cmpltsd %xmm1, %xmm3 +; SSE-NEXT: movapd %xmm3, %xmm4 +; SSE-NEXT: andnpd %xmm2, %xmm4 +; SSE-NEXT: andpd %xmm0, %xmm3 +; SSE-NEXT: orpd %xmm4, %xmm3 +; SSE-NEXT: cvttsd2si %xmm3, %rax +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: ucomisd %xmm1, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm2 +; SSE-NEXT: setae %cl +; SSE-NEXT: shlq $63, %rcx +; SSE-NEXT: xorq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm2 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movapd %xmm0, %xmm3 -; SSE-NEXT: subsd %xmm1, %xmm3 -; SSE-NEXT: cvttsd2si %xmm3, %rax -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttsd2si %xmm0, %rcx +; SSE-NEXT: cmpltsd %xmm1, %xmm3 +; SSE-NEXT: movapd %xmm3, %xmm4 +; SSE-NEXT: andpd %xmm0, %xmm3 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomisd %xmm1, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rcx -; SSE-NEXT: movq %rcx, %xmm0 +; SSE-NEXT: subsd %xmm1, %xmm0 +; SSE-NEXT: andnpd %xmm0, %xmm4 +; SSE-NEXT: orpd %xmm4, %xmm3 +; SSE-NEXT: cvttsd2si %xmm3, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] ; SSE-NEXT: retq @@ -536,21 +582,26 @@ ; VEX-LABEL: fptoui_2f64_to_2i32: ; VEX: # %bb.0: ; VEX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm2 ; VEX-NEXT: vcvttsd2si %xmm2, %rax -; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttsd2si %xmm0, %rdx +; VEX-NEXT: xorl %ecx, %ecx ; VEX-NEXT: vucomisd %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rdx -; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: setae %cl +; VEX-NEXT: shlq $63, %rcx +; VEX-NEXT: xorq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm2 ; VEX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm4 +; VEX-NEXT: vblendvpd %xmm3, %xmm0, %xmm4, %xmm3 ; VEX-NEXT: vcvttsd2si %xmm3, %rax -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttsd2si %xmm0, %rcx +; VEX-NEXT: xorl %ecx, %ecx ; VEX-NEXT: vucomisd %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: setae %cl +; VEX-NEXT: shlq $63, %rcx +; VEX-NEXT: xorq %rax, %rcx ; VEX-NEXT: vmovq %rcx, %xmm0 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] ; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] @@ -605,22 +656,34 @@ ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: subsd %xmm2, %xmm1 -; SSE-NEXT: cvttsd2si %xmm1, %rax -; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttsd2si %xmm0, %rdx +; SSE-NEXT: movapd %xmm0, %xmm3 +; SSE-NEXT: cmpltsd %xmm2, %xmm3 +; SSE-NEXT: movapd %xmm3, %xmm4 +; SSE-NEXT: andnpd %xmm1, %xmm4 +; SSE-NEXT: andpd %xmm0, %xmm3 +; SSE-NEXT: orpd %xmm4, %xmm3 +; SSE-NEXT: cvttsd2si %xmm3, %rax +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: ucomisd %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: setae %cl +; SSE-NEXT: shlq $63, %rcx +; SSE-NEXT: xorq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movapd %xmm0, %xmm3 -; SSE-NEXT: subsd %xmm2, %xmm3 -; SSE-NEXT: cvttsd2si %xmm3, %rax -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttsd2si %xmm0, %rcx +; SSE-NEXT: cmpltsd %xmm2, %xmm3 +; SSE-NEXT: movapd %xmm3, %xmm4 +; SSE-NEXT: andpd %xmm0, %xmm3 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomisd %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rcx -; SSE-NEXT: movq %rcx, %xmm0 +; SSE-NEXT: subsd %xmm2, %xmm0 +; SSE-NEXT: andnpd %xmm0, %xmm4 +; SSE-NEXT: orpd %xmm4, %xmm3 +; SSE-NEXT: cvttsd2si %xmm3, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: pxor %xmm0, %xmm0 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] @@ -693,39 +756,64 @@ ; SSE-NEXT: movapd %xmm0, %xmm2 ; SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero ; SSE-NEXT: subsd %xmm3, %xmm0 -; SSE-NEXT: cvttsd2si %xmm0, %rcx -; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm2, %rdx +; SSE-NEXT: movapd %xmm2, %xmm4 +; SSE-NEXT: cmpltsd %xmm3, %xmm4 +; SSE-NEXT: movapd %xmm4, %xmm5 +; SSE-NEXT: andnpd %xmm0, %xmm5 +; SSE-NEXT: andpd %xmm2, %xmm4 +; SSE-NEXT: orpd %xmm5, %xmm4 +; SSE-NEXT: cvttsd2si %xmm4, %rax +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: ucomisd %xmm3, %xmm2 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm0 +; SSE-NEXT: setae %cl +; SSE-NEXT: shlq $63, %rcx +; SSE-NEXT: xorq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm0 ; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: movapd %xmm2, %xmm4 -; SSE-NEXT: subsd %xmm3, %xmm4 -; SSE-NEXT: cvttsd2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm2, %rdx +; SSE-NEXT: cmpltsd %xmm3, %xmm4 +; SSE-NEXT: movapd %xmm4, %xmm5 +; SSE-NEXT: andpd %xmm2, %xmm4 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomisd %xmm3, %xmm2 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm2 +; SSE-NEXT: subsd %xmm3, %xmm2 +; SSE-NEXT: andnpd %xmm2, %xmm5 +; SSE-NEXT: orpd %xmm5, %xmm4 +; SSE-NEXT: cvttsd2si %xmm4, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: movq %rax, %xmm2 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE-NEXT: movapd %xmm1, %xmm2 ; SSE-NEXT: subsd %xmm3, %xmm2 -; SSE-NEXT: cvttsd2si %xmm2, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm1, %rdx +; SSE-NEXT: movapd %xmm1, %xmm4 +; SSE-NEXT: cmpltsd %xmm3, %xmm4 +; SSE-NEXT: movapd %xmm4, %xmm5 +; SSE-NEXT: andnpd %xmm2, %xmm5 +; SSE-NEXT: andpd %xmm1, %xmm4 +; SSE-NEXT: orpd %xmm5, %xmm4 +; SSE-NEXT: cvttsd2si %xmm4, %rax +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: ucomisd %xmm3, %xmm1 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm2 +; SSE-NEXT: setae %cl +; SSE-NEXT: shlq $63, %rcx +; SSE-NEXT: xorq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm2 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: movapd %xmm1, %xmm4 -; SSE-NEXT: subsd %xmm3, %xmm4 -; SSE-NEXT: cvttsd2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm1, %rax +; SSE-NEXT: cmpltsd %xmm3, %xmm4 +; SSE-NEXT: movapd %xmm4, %xmm5 +; SSE-NEXT: andpd %xmm1, %xmm4 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomisd %xmm3, %xmm1 -; SSE-NEXT: cmovaeq %rcx, %rax +; SSE-NEXT: subsd %xmm3, %xmm1 +; SSE-NEXT: andnpd %xmm1, %xmm5 +; SSE-NEXT: orpd %xmm5, %xmm4 +; SSE-NEXT: cvttsd2si %xmm4, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax ; SSE-NEXT: movq %rax, %xmm1 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: movdqa %xmm2, %xmm1 @@ -735,37 +823,48 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vcmpltsd %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm4 +; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm4, %xmm3 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax -; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttsd2si %xmm2, %rdx +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vucomisd %xmm1, %xmm2 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm3 +; AVX1-NEXT: setae %cl +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm4 +; AVX1-NEXT: vcmpltsd %xmm1, %xmm2, %xmm4 +; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm5 +; AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm5, %xmm4 ; AVX1-NEXT: vcvttsd2si %xmm4, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttsd2si %xmm2, %rdx +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vucomisd %xmm1, %xmm2 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm2 +; AVX1-NEXT: setae %cl +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm3 +; AVX1-NEXT: vcmpltsd %xmm1, %xmm0, %xmm3 +; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm4 +; AVX1-NEXT: vblendvpd %xmm3, %xmm0, %xmm4, %xmm3 ; AVX1-NEXT: vcvttsd2si %xmm3, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttsd2si %xmm0, %rdx +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vucomisd %xmm1, %xmm0 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm3 +; AVX1-NEXT: setae %cl +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm4 +; AVX1-NEXT: vcmpltsd %xmm1, %xmm0, %xmm4 +; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm5 +; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm5, %xmm4 ; AVX1-NEXT: vcvttsd2si %xmm4, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttsd2si %xmm0, %rcx +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vucomisd %xmm1, %xmm0 -; AVX1-NEXT: cmovaeq %rax, %rcx +; AVX1-NEXT: setae %cl +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx ; AVX1-NEXT: vmovq %rcx, %xmm0 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -775,37 +874,48 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm3 +; AVX2-NEXT: vcmpltsd %xmm1, %xmm2, %xmm3 +; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm4 +; AVX2-NEXT: vblendvpd %xmm3, %xmm2, %xmm4, %xmm3 ; AVX2-NEXT: vcvttsd2si %xmm3, %rax -; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttsd2si %xmm2, %rdx +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vucomisd %xmm1, %xmm2 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm3 +; AVX2-NEXT: setae %cl +; AVX2-NEXT: shlq $63, %rcx +; AVX2-NEXT: xorq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm3 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm4 +; AVX2-NEXT: vcmpltsd %xmm1, %xmm2, %xmm4 +; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm5 +; AVX2-NEXT: vblendvpd %xmm4, %xmm2, %xmm5, %xmm4 ; AVX2-NEXT: vcvttsd2si %xmm4, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttsd2si %xmm2, %rdx +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vucomisd %xmm1, %xmm2 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm2 +; AVX2-NEXT: setae %cl +; AVX2-NEXT: shlq $63, %rcx +; AVX2-NEXT: xorq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm2 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm3 +; AVX2-NEXT: vcmpltsd %xmm1, %xmm0, %xmm3 +; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm4 +; AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm4, %xmm3 ; AVX2-NEXT: vcvttsd2si %xmm3, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttsd2si %xmm0, %rdx +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vucomisd %xmm1, %xmm0 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm3 +; AVX2-NEXT: setae %cl +; AVX2-NEXT: shlq $63, %rcx +; AVX2-NEXT: xorq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm3 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm4 +; AVX2-NEXT: vcmpltsd %xmm1, %xmm0, %xmm4 +; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm5 +; AVX2-NEXT: vblendvpd %xmm4, %xmm0, %xmm5, %xmm4 ; AVX2-NEXT: vcvttsd2si %xmm4, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttsd2si %xmm0, %rcx +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vucomisd %xmm1, %xmm0 -; AVX2-NEXT: cmovaeq %rax, %rcx +; AVX2-NEXT: setae %cl +; AVX2-NEXT: shlq $63, %rcx +; AVX2-NEXT: xorq %rax, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm0 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 @@ -891,39 +1001,64 @@ ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero ; SSE-NEXT: movapd %xmm1, %xmm3 ; SSE-NEXT: subsd %xmm2, %xmm3 -; SSE-NEXT: cvttsd2si %xmm3, %rcx -; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm1, %rdx +; SSE-NEXT: movapd %xmm1, %xmm4 +; SSE-NEXT: cmpltsd %xmm2, %xmm4 +; SSE-NEXT: movapd %xmm4, %xmm5 +; SSE-NEXT: andnpd %xmm3, %xmm5 +; SSE-NEXT: andpd %xmm1, %xmm4 +; SSE-NEXT: orpd %xmm5, %xmm4 +; SSE-NEXT: cvttsd2si %xmm4, %rax +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: ucomisd %xmm2, %xmm1 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm3 +; SSE-NEXT: setae %cl +; SSE-NEXT: shlq $63, %rcx +; SSE-NEXT: xorq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm3 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: movapd %xmm1, %xmm4 -; SSE-NEXT: subsd %xmm2, %xmm4 -; SSE-NEXT: cvttsd2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm1, %rdx +; SSE-NEXT: cmpltsd %xmm2, %xmm4 +; SSE-NEXT: movapd %xmm4, %xmm5 +; SSE-NEXT: andpd %xmm1, %xmm4 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomisd %xmm2, %xmm1 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: subsd %xmm2, %xmm1 +; SSE-NEXT: andnpd %xmm1, %xmm5 +; SSE-NEXT: orpd %xmm5, %xmm4 +; SSE-NEXT: cvttsd2si %xmm4, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: movq %rax, %xmm1 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0] ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: subsd %xmm2, %xmm1 -; SSE-NEXT: cvttsd2si %xmm1, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm0, %rdx +; SSE-NEXT: movapd %xmm0, %xmm4 +; SSE-NEXT: cmpltsd %xmm2, %xmm4 +; SSE-NEXT: movapd %xmm4, %xmm5 +; SSE-NEXT: andnpd %xmm1, %xmm5 +; SSE-NEXT: andpd %xmm0, %xmm4 +; SSE-NEXT: orpd %xmm5, %xmm4 +; SSE-NEXT: cvttsd2si %xmm4, %rax +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: ucomisd %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: setae %cl +; SSE-NEXT: shlq $63, %rcx +; SSE-NEXT: xorq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movapd %xmm0, %xmm4 -; SSE-NEXT: subsd %xmm2, %xmm4 -; SSE-NEXT: cvttsd2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttsd2si %xmm0, %rax +; SSE-NEXT: cmpltsd %xmm2, %xmm4 +; SSE-NEXT: movapd %xmm4, %xmm5 +; SSE-NEXT: andpd %xmm0, %xmm4 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomisd %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rcx, %rax +; SSE-NEXT: subsd %xmm2, %xmm0 +; SSE-NEXT: andnpd %xmm0, %xmm5 +; SSE-NEXT: orpd %xmm5, %xmm4 +; SSE-NEXT: cvttsd2si %xmm4, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2] @@ -1481,22 +1616,34 @@ ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: subss %xmm2, %xmm1 -; SSE-NEXT: cvttss2si %xmm1, %rax -; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttss2si %xmm0, %rdx +; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: cmpltss %xmm2, %xmm3 +; SSE-NEXT: movaps %xmm3, %xmm4 +; SSE-NEXT: andnps %xmm1, %xmm4 +; SSE-NEXT: andps %xmm0, %xmm3 +; SSE-NEXT: orps %xmm4, %xmm3 +; SSE-NEXT: cvttss2si %xmm3, %rax +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: ucomiss %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: setae %cl +; SSE-NEXT: shlq $63, %rcx +; SSE-NEXT: xorq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE-NEXT: movaps %xmm0, %xmm3 -; SSE-NEXT: subss %xmm2, %xmm3 -; SSE-NEXT: cvttss2si %xmm3, %rax -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttss2si %xmm0, %rcx +; SSE-NEXT: cmpltss %xmm2, %xmm3 +; SSE-NEXT: movaps %xmm3, %xmm4 +; SSE-NEXT: andps %xmm0, %xmm3 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rcx -; SSE-NEXT: movq %rcx, %xmm0 +; SSE-NEXT: subss %xmm2, %xmm0 +; SSE-NEXT: andnps %xmm0, %xmm4 +; SSE-NEXT: orps %xmm4, %xmm3 +; SSE-NEXT: cvttss2si %xmm3, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq @@ -1504,21 +1651,26 @@ ; VEX-LABEL: fptoui_2f32_to_2i32: ; VEX: # %bb.0: ; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vcmpltss %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vblendvps %xmm2, %xmm0, %xmm3, %xmm2 ; VEX-NEXT: vcvttss2si %xmm2, %rax -; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttss2si %xmm0, %rdx +; VEX-NEXT: xorl %ecx, %ecx ; VEX-NEXT: vucomiss %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rdx -; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: setae %cl +; VEX-NEXT: shlq $63, %rcx +; VEX-NEXT: xorq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm2 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vcmpltss %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm4 +; VEX-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3 ; VEX-NEXT: vcvttss2si %xmm3, %rax -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttss2si %xmm0, %rcx +; VEX-NEXT: xorl %ecx, %ecx ; VEX-NEXT: vucomiss %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: setae %cl +; VEX-NEXT: shlq $63, %rcx +; VEX-NEXT: xorq %rax, %rcx ; VEX-NEXT: vmovq %rcx, %xmm0 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] ; VEX-NEXT: retq @@ -1652,22 +1804,34 @@ ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: subss %xmm2, %xmm1 -; SSE-NEXT: cvttss2si %xmm1, %rax -; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttss2si %xmm0, %rdx +; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: cmpltss %xmm2, %xmm3 +; SSE-NEXT: movaps %xmm3, %xmm4 +; SSE-NEXT: andnps %xmm1, %xmm4 +; SSE-NEXT: andps %xmm0, %xmm3 +; SSE-NEXT: orps %xmm4, %xmm3 +; SSE-NEXT: cvttss2si %xmm3, %rax +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: ucomiss %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: setae %cl +; SSE-NEXT: shlq $63, %rcx +; SSE-NEXT: xorq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE-NEXT: movaps %xmm0, %xmm3 -; SSE-NEXT: subss %xmm2, %xmm3 -; SSE-NEXT: cvttss2si %xmm3, %rax -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttss2si %xmm0, %rcx +; SSE-NEXT: cmpltss %xmm2, %xmm3 +; SSE-NEXT: movaps %xmm3, %xmm4 +; SSE-NEXT: andps %xmm0, %xmm3 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rcx -; SSE-NEXT: movq %rcx, %xmm0 +; SSE-NEXT: subss %xmm2, %xmm0 +; SSE-NEXT: andnps %xmm0, %xmm4 +; SSE-NEXT: orps %xmm4, %xmm3 +; SSE-NEXT: cvttss2si %xmm3, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq @@ -1675,21 +1839,26 @@ ; VEX-LABEL: fptoui_2f32_to_2i64: ; VEX: # %bb.0: ; VEX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vcmpltss %xmm1, %xmm0, %xmm2 +; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vblendvps %xmm2, %xmm0, %xmm3, %xmm2 ; VEX-NEXT: vcvttss2si %xmm2, %rax -; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttss2si %xmm0, %rdx +; VEX-NEXT: xorl %ecx, %ecx ; VEX-NEXT: vucomiss %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rdx -; VEX-NEXT: vmovq %rdx, %xmm2 +; VEX-NEXT: setae %cl +; VEX-NEXT: shlq $63, %rcx +; VEX-NEXT: xorq %rax, %rcx +; VEX-NEXT: vmovq %rcx, %xmm2 ; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vcmpltss %xmm1, %xmm0, %xmm3 +; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm4 +; VEX-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3 ; VEX-NEXT: vcvttss2si %xmm3, %rax -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttss2si %xmm0, %rcx +; VEX-NEXT: xorl %ecx, %ecx ; VEX-NEXT: vucomiss %xmm1, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rcx +; VEX-NEXT: setae %cl +; VEX-NEXT: shlq $63, %rcx +; VEX-NEXT: xorq %rax, %rcx ; VEX-NEXT: vmovq %rcx, %xmm0 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] ; VEX-NEXT: retq @@ -1752,22 +1921,34 @@ ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE-NEXT: movaps %xmm0, %xmm1 ; SSE-NEXT: subss %xmm2, %xmm1 -; SSE-NEXT: cvttss2si %xmm1, %rax -; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttss2si %xmm0, %rdx +; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: cmpltss %xmm2, %xmm3 +; SSE-NEXT: movaps %xmm3, %xmm4 +; SSE-NEXT: andnps %xmm1, %xmm4 +; SSE-NEXT: andps %xmm0, %xmm3 +; SSE-NEXT: orps %xmm4, %xmm3 +; SSE-NEXT: cvttss2si %xmm3, %rax +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: ucomiss %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rdx -; SSE-NEXT: movq %rdx, %xmm1 +; SSE-NEXT: setae %cl +; SSE-NEXT: shlq $63, %rcx +; SSE-NEXT: xorq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm1 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE-NEXT: movaps %xmm0, %xmm3 -; SSE-NEXT: subss %xmm2, %xmm3 -; SSE-NEXT: cvttss2si %xmm3, %rax -; SSE-NEXT: xorq %rcx, %rax -; SSE-NEXT: cvttss2si %xmm0, %rcx +; SSE-NEXT: cmpltss %xmm2, %xmm3 +; SSE-NEXT: movaps %xmm3, %xmm4 +; SSE-NEXT: andps %xmm0, %xmm3 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm2, %xmm0 -; SSE-NEXT: cmovaeq %rax, %rcx -; SSE-NEXT: movq %rcx, %xmm0 +; SSE-NEXT: subss %xmm2, %xmm0 +; SSE-NEXT: andnps %xmm0, %xmm4 +; SSE-NEXT: orps %xmm4, %xmm3 +; SSE-NEXT: cvttss2si %xmm3, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq @@ -1776,21 +1957,26 @@ ; VEX: # %bb.0: ; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; VEX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; VEX-NEXT: vsubss %xmm2, %xmm1, %xmm3 +; VEX-NEXT: vcmpltss %xmm2, %xmm1, %xmm3 +; VEX-NEXT: vsubss %xmm2, %xmm1, %xmm4 +; VEX-NEXT: vblendvps %xmm3, %xmm1, %xmm4, %xmm3 ; VEX-NEXT: vcvttss2si %xmm3, %rax -; VEX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttss2si %xmm1, %rdx +; VEX-NEXT: xorl %ecx, %ecx ; VEX-NEXT: vucomiss %xmm2, %xmm1 -; VEX-NEXT: cmovaeq %rax, %rdx -; VEX-NEXT: vsubss %xmm2, %xmm0, %xmm1 +; VEX-NEXT: setae %cl +; VEX-NEXT: shlq $63, %rcx +; VEX-NEXT: xorq %rax, %rcx +; VEX-NEXT: vcmpltss %xmm2, %xmm0, %xmm1 +; VEX-NEXT: vsubss %xmm2, %xmm0, %xmm3 +; VEX-NEXT: vblendvps %xmm1, %xmm0, %xmm3, %xmm1 ; VEX-NEXT: vcvttss2si %xmm1, %rax -; VEX-NEXT: xorq %rcx, %rax -; VEX-NEXT: vcvttss2si %xmm0, %rcx +; VEX-NEXT: xorl %edx, %edx ; VEX-NEXT: vucomiss %xmm2, %xmm0 -; VEX-NEXT: cmovaeq %rax, %rcx -; VEX-NEXT: vmovq %rcx, %xmm0 -; VEX-NEXT: vmovq %rdx, %xmm1 +; VEX-NEXT: setae %dl +; VEX-NEXT: shlq $63, %rdx +; VEX-NEXT: xorq %rax, %rdx +; VEX-NEXT: vmovq %rdx, %xmm0 +; VEX-NEXT: vmovq %rcx, %xmm1 ; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; VEX-NEXT: retq ; @@ -1989,42 +2175,66 @@ ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: subss %xmm1, %xmm2 -; SSE-NEXT: cvttss2si %xmm2, %rcx -; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm0, %rdx +; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: cmpltss %xmm1, %xmm3 +; SSE-NEXT: movaps %xmm3, %xmm4 +; SSE-NEXT: andnps %xmm2, %xmm4 +; SSE-NEXT: andps %xmm0, %xmm3 +; SSE-NEXT: orps %xmm4, %xmm3 +; SSE-NEXT: cvttss2si %xmm3, %rax +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: ucomiss %xmm1, %xmm0 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm2 +; SSE-NEXT: setae %cl +; SSE-NEXT: shlq $63, %rcx +; SSE-NEXT: xorq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] ; SSE-NEXT: movaps %xmm3, %xmm4 -; SSE-NEXT: subss %xmm1, %xmm4 -; SSE-NEXT: cvttss2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm3, %rdx +; SSE-NEXT: cmpltss %xmm1, %xmm4 +; SSE-NEXT: movaps %xmm4, %xmm5 +; SSE-NEXT: andps %xmm3, %xmm4 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm1, %xmm3 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm3 +; SSE-NEXT: subss %xmm1, %xmm3 +; SSE-NEXT: andnps %xmm3, %xmm5 +; SSE-NEXT: orps %xmm5, %xmm4 +; SSE-NEXT: cvttss2si %xmm4, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: movq %rax, %xmm3 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3] ; SSE-NEXT: movaps %xmm3, %xmm4 -; SSE-NEXT: subss %xmm1, %xmm4 -; SSE-NEXT: cvttss2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm3, %rdx +; SSE-NEXT: cmpltss %xmm1, %xmm4 +; SSE-NEXT: movaps %xmm4, %xmm5 +; SSE-NEXT: andps %xmm3, %xmm4 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm1, %xmm3 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm3 +; SSE-NEXT: subss %xmm1, %xmm3 +; SSE-NEXT: andnps %xmm3, %xmm5 +; SSE-NEXT: orps %xmm5, %xmm4 +; SSE-NEXT: cvttss2si %xmm4, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: movq %rax, %xmm3 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movaps %xmm0, %xmm4 -; SSE-NEXT: subss %xmm1, %xmm4 -; SSE-NEXT: cvttss2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm0, %rax +; SSE-NEXT: cmpltss %xmm1, %xmm4 +; SSE-NEXT: movaps %xmm4, %xmm5 +; SSE-NEXT: andps %xmm0, %xmm4 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm1, %xmm0 -; SSE-NEXT: cmovaeq %rcx, %rax +; SSE-NEXT: subss %xmm1, %xmm0 +; SSE-NEXT: andnps %xmm0, %xmm5 +; SSE-NEXT: orps %xmm5, %xmm4 +; SSE-NEXT: cvttss2si %xmm4, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax ; SSE-NEXT: movq %rax, %xmm1 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; SSE-NEXT: movdqa %xmm2, %xmm0 @@ -2034,37 +2244,48 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vcmpltss %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm4 +; AVX1-NEXT: vblendvps %xmm3, %xmm2, %xmm4, %xmm3 ; AVX1-NEXT: vcvttss2si %xmm3, %rax -; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttss2si %xmm2, %rdx +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vucomiss %xmm1, %xmm2 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm2 +; AVX1-NEXT: setae %cl +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4 +; AVX1-NEXT: vcmpltss %xmm1, %xmm3, %xmm4 +; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm5 +; AVX1-NEXT: vblendvps %xmm4, %xmm3, %xmm5, %xmm4 ; AVX1-NEXT: vcvttss2si %xmm4, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttss2si %xmm3, %rdx +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vucomiss %xmm1, %xmm3 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm3 +; AVX1-NEXT: setae %cl +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3 +; AVX1-NEXT: vcmpltss %xmm1, %xmm0, %xmm3 +; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm4 +; AVX1-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3 ; AVX1-NEXT: vcvttss2si %xmm3, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttss2si %xmm0, %rdx +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vucomiss %xmm1, %xmm0 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm3 +; AVX1-NEXT: setae %cl +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm4 +; AVX1-NEXT: vcmpltss %xmm1, %xmm0, %xmm4 +; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm5 +; AVX1-NEXT: vblendvps %xmm4, %xmm0, %xmm5, %xmm4 ; AVX1-NEXT: vcvttss2si %xmm4, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttss2si %xmm0, %rcx +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vucomiss %xmm1, %xmm0 -; AVX1-NEXT: cmovaeq %rax, %rcx +; AVX1-NEXT: setae %cl +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx ; AVX1-NEXT: vmovq %rcx, %xmm0 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -2074,37 +2295,48 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3 +; AVX2-NEXT: vcmpltss %xmm1, %xmm2, %xmm3 +; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm4 +; AVX2-NEXT: vblendvps %xmm3, %xmm2, %xmm4, %xmm3 ; AVX2-NEXT: vcvttss2si %xmm3, %rax -; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttss2si %xmm2, %rdx +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vucomiss %xmm1, %xmm2 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm2 +; AVX2-NEXT: setae %cl +; AVX2-NEXT: shlq $63, %rcx +; AVX2-NEXT: xorq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm2 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4 +; AVX2-NEXT: vcmpltss %xmm1, %xmm3, %xmm4 +; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm5 +; AVX2-NEXT: vblendvps %xmm4, %xmm3, %xmm5, %xmm4 ; AVX2-NEXT: vcvttss2si %xmm4, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttss2si %xmm3, %rdx +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vucomiss %xmm1, %xmm3 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm3 +; AVX2-NEXT: setae %cl +; AVX2-NEXT: shlq $63, %rcx +; AVX2-NEXT: xorq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm3 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3 +; AVX2-NEXT: vcmpltss %xmm1, %xmm0, %xmm3 +; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm4 +; AVX2-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3 ; AVX2-NEXT: vcvttss2si %xmm3, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttss2si %xmm0, %rdx +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vucomiss %xmm1, %xmm0 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm3 +; AVX2-NEXT: setae %cl +; AVX2-NEXT: shlq $63, %rcx +; AVX2-NEXT: xorq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm3 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm4 +; AVX2-NEXT: vcmpltss %xmm1, %xmm0, %xmm4 +; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm5 +; AVX2-NEXT: vblendvps %xmm4, %xmm0, %xmm5, %xmm4 ; AVX2-NEXT: vcvttss2si %xmm4, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttss2si %xmm0, %rcx +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vucomiss %xmm1, %xmm0 -; AVX2-NEXT: cmovaeq %rax, %rcx +; AVX2-NEXT: setae %cl +; AVX2-NEXT: shlq $63, %rcx +; AVX2-NEXT: xorq %rax, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm0 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 @@ -2190,42 +2422,66 @@ ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: subss %xmm1, %xmm2 -; SSE-NEXT: cvttss2si %xmm2, %rcx -; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm0, %rdx +; SSE-NEXT: movaps %xmm0, %xmm3 +; SSE-NEXT: cmpltss %xmm1, %xmm3 +; SSE-NEXT: movaps %xmm3, %xmm4 +; SSE-NEXT: andnps %xmm2, %xmm4 +; SSE-NEXT: andps %xmm0, %xmm3 +; SSE-NEXT: orps %xmm4, %xmm3 +; SSE-NEXT: cvttss2si %xmm3, %rax +; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: ucomiss %xmm1, %xmm0 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm2 +; SSE-NEXT: setae %cl +; SSE-NEXT: shlq $63, %rcx +; SSE-NEXT: xorq %rax, %rcx +; SSE-NEXT: movq %rcx, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] ; SSE-NEXT: movaps %xmm3, %xmm4 -; SSE-NEXT: subss %xmm1, %xmm4 -; SSE-NEXT: cvttss2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm3, %rdx +; SSE-NEXT: cmpltss %xmm1, %xmm4 +; SSE-NEXT: movaps %xmm4, %xmm5 +; SSE-NEXT: andps %xmm3, %xmm4 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm1, %xmm3 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm3 +; SSE-NEXT: subss %xmm1, %xmm3 +; SSE-NEXT: andnps %xmm3, %xmm5 +; SSE-NEXT: orps %xmm5, %xmm4 +; SSE-NEXT: cvttss2si %xmm4, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: movq %rax, %xmm3 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3] ; SSE-NEXT: movaps %xmm3, %xmm4 -; SSE-NEXT: subss %xmm1, %xmm4 -; SSE-NEXT: cvttss2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm3, %rdx +; SSE-NEXT: cmpltss %xmm1, %xmm4 +; SSE-NEXT: movaps %xmm4, %xmm5 +; SSE-NEXT: andps %xmm3, %xmm4 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm1, %xmm3 -; SSE-NEXT: cmovaeq %rcx, %rdx -; SSE-NEXT: movq %rdx, %xmm3 +; SSE-NEXT: subss %xmm1, %xmm3 +; SSE-NEXT: andnps %xmm3, %xmm5 +; SSE-NEXT: orps %xmm5, %xmm4 +; SSE-NEXT: cvttss2si %xmm4, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax +; SSE-NEXT: movq %rax, %xmm3 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movaps %xmm0, %xmm4 -; SSE-NEXT: subss %xmm1, %xmm4 -; SSE-NEXT: cvttss2si %xmm4, %rcx -; SSE-NEXT: xorq %rax, %rcx -; SSE-NEXT: cvttss2si %xmm0, %rax +; SSE-NEXT: cmpltss %xmm1, %xmm4 +; SSE-NEXT: movaps %xmm4, %xmm5 +; SSE-NEXT: andps %xmm0, %xmm4 +; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm1, %xmm0 -; SSE-NEXT: cmovaeq %rcx, %rax +; SSE-NEXT: subss %xmm1, %xmm0 +; SSE-NEXT: andnps %xmm0, %xmm5 +; SSE-NEXT: orps %xmm5, %xmm4 +; SSE-NEXT: cvttss2si %xmm4, %rcx +; SSE-NEXT: setae %al +; SSE-NEXT: shlq $63, %rax +; SSE-NEXT: xorq %rcx, %rax ; SSE-NEXT: movq %rax, %xmm1 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; SSE-NEXT: movdqa %xmm2, %xmm0 @@ -2235,37 +2491,48 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vcmpltss %xmm1, %xmm2, %xmm3 +; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm4 +; AVX1-NEXT: vblendvps %xmm3, %xmm2, %xmm4, %xmm3 ; AVX1-NEXT: vcvttss2si %xmm3, %rax -; AVX1-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttss2si %xmm2, %rdx +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vucomiss %xmm1, %xmm2 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm2 +; AVX1-NEXT: setae %cl +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 ; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4 +; AVX1-NEXT: vcmpltss %xmm1, %xmm3, %xmm4 +; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm5 +; AVX1-NEXT: vblendvps %xmm4, %xmm3, %xmm5, %xmm4 ; AVX1-NEXT: vcvttss2si %xmm4, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttss2si %xmm3, %rdx +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vucomiss %xmm1, %xmm3 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm3 +; AVX1-NEXT: setae %cl +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3 +; AVX1-NEXT: vcmpltss %xmm1, %xmm0, %xmm3 +; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm4 +; AVX1-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3 ; AVX1-NEXT: vcvttss2si %xmm3, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttss2si %xmm0, %rdx +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vucomiss %xmm1, %xmm0 -; AVX1-NEXT: cmovaeq %rax, %rdx -; AVX1-NEXT: vmovq %rdx, %xmm3 +; AVX1-NEXT: setae %cl +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 ; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm4 +; AVX1-NEXT: vcmpltss %xmm1, %xmm0, %xmm4 +; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm5 +; AVX1-NEXT: vblendvps %xmm4, %xmm0, %xmm5, %xmm4 ; AVX1-NEXT: vcvttss2si %xmm4, %rax -; AVX1-NEXT: xorq %rcx, %rax -; AVX1-NEXT: vcvttss2si %xmm0, %rcx +; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: vucomiss %xmm1, %xmm0 -; AVX1-NEXT: cmovaeq %rax, %rcx +; AVX1-NEXT: setae %cl +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx ; AVX1-NEXT: vmovq %rcx, %xmm0 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -2275,37 +2542,48 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] ; AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3 +; AVX2-NEXT: vcmpltss %xmm1, %xmm2, %xmm3 +; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm4 +; AVX2-NEXT: vblendvps %xmm3, %xmm2, %xmm4, %xmm3 ; AVX2-NEXT: vcvttss2si %xmm3, %rax -; AVX2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttss2si %xmm2, %rdx +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vucomiss %xmm1, %xmm2 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm2 +; AVX2-NEXT: setae %cl +; AVX2-NEXT: shlq $63, %rcx +; AVX2-NEXT: xorq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm2 ; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] -; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4 +; AVX2-NEXT: vcmpltss %xmm1, %xmm3, %xmm4 +; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm5 +; AVX2-NEXT: vblendvps %xmm4, %xmm3, %xmm5, %xmm4 ; AVX2-NEXT: vcvttss2si %xmm4, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttss2si %xmm3, %rdx +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vucomiss %xmm1, %xmm3 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm3 +; AVX2-NEXT: setae %cl +; AVX2-NEXT: shlq $63, %rcx +; AVX2-NEXT: xorq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm3 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3 +; AVX2-NEXT: vcmpltss %xmm1, %xmm0, %xmm3 +; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm4 +; AVX2-NEXT: vblendvps %xmm3, %xmm0, %xmm4, %xmm3 ; AVX2-NEXT: vcvttss2si %xmm3, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttss2si %xmm0, %rdx +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vucomiss %xmm1, %xmm0 -; AVX2-NEXT: cmovaeq %rax, %rdx -; AVX2-NEXT: vmovq %rdx, %xmm3 +; AVX2-NEXT: setae %cl +; AVX2-NEXT: shlq $63, %rcx +; AVX2-NEXT: xorq %rax, %rcx +; AVX2-NEXT: vmovq %rcx, %xmm3 ; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm4 +; AVX2-NEXT: vcmpltss %xmm1, %xmm0, %xmm4 +; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm5 +; AVX2-NEXT: vblendvps %xmm4, %xmm0, %xmm5, %xmm4 ; AVX2-NEXT: vcvttss2si %xmm4, %rax -; AVX2-NEXT: xorq %rcx, %rax -; AVX2-NEXT: vcvttss2si %xmm0, %rcx +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vucomiss %xmm1, %xmm0 -; AVX2-NEXT: cmovaeq %rax, %rcx +; AVX2-NEXT: setae %cl +; AVX2-NEXT: shlq $63, %rcx +; AVX2-NEXT: xorq %rax, %rcx ; AVX2-NEXT: vmovq %rcx, %xmm0 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0