Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5703,11 +5703,23 @@ // If the maximum float value is smaller then the signed integer range, // the destination signmask can't be represented by the float, so we can // just use FP_TO_SINT directly. + bool KnownInRange = false; + SDValue Cst, Sel; const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT); APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits())); APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits()); if (APFloat::opOverflow & - APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) { + APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) + KnownInRange = true; + else { + // Likewise if we prove at compile time that the argument is in range. + Cst = DAG.getConstantFP(APF, dl, SrcVT); + Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); + if (auto *SelC = dyn_cast(Sel)) + if (!SelC->isNullValue()) + KnownInRange = true; + } + if (KnownInRange) { if (Node->isStrictFPOpcode()) { Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, { Node->getOperand(0), Src }); @@ -5717,9 +5729,6 @@ return true; } - SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); - SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); - bool Strict = Node->isStrictFPOpcode() || shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); @@ -5727,28 +5736,28 @@ // Expand based on maximum range of FP_TO_SINT, if the value exceeds the // signmask then offset (the result of which should be fully representable). // Sel = Src < 0x8000000000000000 - // Val = select Sel, Src, Src - 0x8000000000000000 - // Ofs = select Sel, 0, 0x8000000000000000 - // Result = fp_to_sint(Val) ^ Ofs + // FltOfs = select Sel, 0, 0x8000000000000000 + // IntOfs = select Sel, 0, 0x8000000000000000 + // Result = fp_to_sint(Val - FltOfs) ^ IntOfs // TODO: Should any fast-math-flags be set for the FSUB? - SDValue SrcBiased; - if (Node->isStrictFPOpcode()) - SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, - { Node->getOperand(0), Src, Cst }); - else - SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst); - SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased); - SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), - DAG.getConstant(SignMask, dl, DstVT)); + SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel, + DAG.getConstantFP(0.0, dl, SrcVT), Cst); + SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel, + DAG.getConstant(0, dl, DstVT), + DAG.getConstant(SignMask, dl, DstVT)); SDValue SInt; if (Node->isStrictFPOpcode()) { + SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, + { Node->getOperand(0), Src, FltOfs }); SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, - { SrcBiased.getValue(1), Val }); + { Val.getValue(1), Val }); Chain = SInt.getValue(1); - } else + } else { + SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs); SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val); - Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs); + } + Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs); } else { // Expand based on maximum range of FP_TO_SINT: // True = fp_to_sint(Src) Index: test/CodeGen/SystemZ/fp-strict-conv-10.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-conv-10.ll +++ test/CodeGen/SystemZ/fp-strict-conv-10.ll @@ -18,19 +18,18 @@ ; CHECK-LABEL: f1: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI0_0 -; CHECK-NEXT: le %f2, 0(%r1) -; CHECK-NEXT: ler %f1, %f0 -; CHECK-NEXT: sebr %f1, %f2 -; CHECK-NEXT: cebr %f0, %f2 +; CHECK-NEXT: le %f1, 0(%r1) +; CHECK-NEXT: cebr %f0, %f1 +; CHECK-NEXT: lhi %r0, 0 ; CHECK-NEXT: jl .LBB0_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ler %f0, %f1 +; CHECK-NEXT: llilh %r0, 32768 ; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: lhi %r0, 0 -; CHECK-NEXT: jl .LBB0_4 +; CHECK-NEXT: jnl .LBB0_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: lzer %f1 ; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: sebr %f0, %f1 ; CHECK-NEXT: cfebr %r2, 5, %f0 ; CHECK-NEXT: xr %r2, %r0 ; CHECK-NEXT: br %r14 @@ -44,19 +43,18 @@ ; CHECK-LABEL: f2: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI1_0 -; CHECK-NEXT: ldeb %f2, 0(%r1) -; CHECK-NEXT: ldr %f1, %f0 -; CHECK-NEXT: sdbr %f1, %f2 -; CHECK-NEXT: cdbr %f0, %f2 +; CHECK-NEXT: ldeb %f1, 0(%r1) +; CHECK-NEXT: cdbr %f0, %f1 +; CHECK-NEXT: lhi %r0, 0 ; CHECK-NEXT: jl .LBB1_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ldr %f0, %f1 +; CHECK-NEXT: llilh %r0, 32768 ; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: lhi %r0, 0 -; CHECK-NEXT: jl .LBB1_4 +; CHECK-NEXT: jnl .LBB1_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: lzdr %f1 ; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: sdbr %f0, %f1 ; CHECK-NEXT: cfdbr %r2, 5, %f0 ; CHECK-NEXT: xr %r2, %r0 ; CHECK-NEXT: br %r14 @@ -72,19 +70,18 @@ ; CHECK-NEXT: ld %f0, 0(%r2) ; CHECK-NEXT: ld %f2, 8(%r2) ; CHECK-NEXT: larl %r1, .LCPI2_0 -; CHECK-NEXT: lxeb %f4, 0(%r1) -; CHECK-NEXT: lxr %f1, %f0 -; CHECK-NEXT: sxbr %f1, %f4 -; CHECK-NEXT: cxbr %f0, %f4 +; CHECK-NEXT: lxeb %f1, 0(%r1) +; CHECK-NEXT: cxbr %f0, %f1 +; CHECK-NEXT: lhi %r0, 0 ; CHECK-NEXT: jl .LBB2_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lxr %f0, %f1 +; CHECK-NEXT: llilh %r0, 32768 ; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: lhi %r0, 0 -; CHECK-NEXT: jl .LBB2_4 +; CHECK-NEXT: jnl .LBB2_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: llilh %r0, 32768 +; CHECK-NEXT: lzxr %f1 ; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: sxbr %f0, %f1 ; CHECK-NEXT: cfxbr %r2, 5, %f0 ; CHECK-NEXT: xr %r2, %r0 ; CHECK-NEXT: br %r14 Index: test/CodeGen/SystemZ/fp-strict-conv-12.ll =================================================================== --- test/CodeGen/SystemZ/fp-strict-conv-12.ll +++ test/CodeGen/SystemZ/fp-strict-conv-12.ll @@ -17,19 +17,18 @@ ; CHECK-LABEL: f1: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI0_0 -; CHECK-NEXT: le %f2, 0(%r1) -; CHECK-NEXT: ler %f1, %f0 -; CHECK-NEXT: sebr %f1, %f2 -; CHECK-NEXT: cebr %f0, %f2 +; CHECK-NEXT: le %f1, 0(%r1) +; CHECK-NEXT: cebr %f0, %f1 +; CHECK-NEXT: lghi %r0, 0 ; CHECK-NEXT: jl .LBB0_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ler %f0, %f1 +; CHECK-NEXT: llihh %r0, 32768 ; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: lghi %r0, 0 -; CHECK-NEXT: jl .LBB0_4 +; CHECK-NEXT: jnl .LBB0_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: lzer %f1 ; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: sebr %f0, %f1 ; CHECK-NEXT: cgebr %r2, 5, %f0 ; CHECK-NEXT: xgr %r2, %r0 ; CHECK-NEXT: br %r14 @@ -43,19 +42,18 @@ ; CHECK-LABEL: f2: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI1_0 -; CHECK-NEXT: ldeb %f2, 0(%r1) -; CHECK-NEXT: ldr %f1, %f0 -; CHECK-NEXT: sdbr %f1, %f2 -; CHECK-NEXT: cdbr %f0, %f2 +; CHECK-NEXT: ldeb %f1, 0(%r1) +; CHECK-NEXT: cdbr %f0, %f1 +; CHECK-NEXT: lghi %r0, 0 ; CHECK-NEXT: jl .LBB1_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ldr %f0, %f1 +; CHECK-NEXT: llihh %r0, 32768 ; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: lghi %r0, 0 -; CHECK-NEXT: jl .LBB1_4 +; CHECK-NEXT: jnl .LBB1_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: lzdr %f1 ; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: sdbr %f0, %f1 ; CHECK-NEXT: cgdbr %r2, 5, %f0 ; CHECK-NEXT: xgr %r2, %r0 ; CHECK-NEXT: br %r14 @@ -71,19 +69,18 @@ ; CHECK-NEXT: ld %f0, 0(%r2) ; CHECK-NEXT: ld %f2, 8(%r2) ; CHECK-NEXT: larl %r1, .LCPI2_0 -; CHECK-NEXT: lxeb %f4, 0(%r1) -; CHECK-NEXT: lxr %f1, %f0 -; CHECK-NEXT: sxbr %f1, %f4 -; CHECK-NEXT: cxbr %f0, %f4 +; CHECK-NEXT: lxeb %f1, 0(%r1) +; CHECK-NEXT: cxbr %f0, %f1 +; CHECK-NEXT: lghi %r0, 0 ; CHECK-NEXT: jl .LBB2_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: lxr %f0, %f1 +; CHECK-NEXT: llihh %r0, 32768 ; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: lghi %r0, 0 -; CHECK-NEXT: jl .LBB2_4 +; CHECK-NEXT: jnl .LBB2_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: llihh %r0, 32768 +; CHECK-NEXT: lzxr %f1 ; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: sxbr %f0, %f1 ; CHECK-NEXT: cgxbr %r2, 5, %f0 ; CHECK-NEXT: xgr %r2, %r0 ; CHECK-NEXT: br %r14 Index: test/CodeGen/X86/fp-intrinsics.ll =================================================================== --- test/CodeGen/X86/fp-intrinsics.ll +++ test/CodeGen/X86/fp-intrinsics.ll @@ -290,27 +290,23 @@ ; unknown. The expansion should have only one conversion instruction. ; Verify that no gross errors happen. ; CHECK-LABEL: @f20u -; NO-FMA: cmpltsd -; NO-FMA: movapd -; NO-FMA: andpd -; NO-FMA: xorl ; NO-FMA: ucomisd -; NO-FMA: subsd -; NO-FMA: andnpd -; NO-FMA: orpd -; NO-FMA: cvttsd2si ; NO-FMA: setae ; NO-FMA: shll +; NO-FMA: movapd +; NO-FMA: cmpltsd +; NO-FMA: andnpd +; NO-FMA: subsd +; NO-FMA: cvttsd2si ; NO-FMA: xorl ; -; HAS-FMA: vcmpltsd -; HAS-FMA: vsubsd -; HAS-FMA: vblendvpd -; HAS-FMA: vcvttsd2si -; HAS-FMA: xorl ; HAS-FMA: vucomisd ; HAS-FMA: setae ; HAS-FMA: shll +; HAS-FMA: vcmpltsd +; HAS-FMA: vandnpd +; HAS-FMA: vsubsd +; HAS-FMA: vcvttsd2si ; HAS-FMA: xorl define i32 @f20u(double %x) { entry: Index: test/CodeGen/X86/vector-constrained-fp-intrinsics.ll =================================================================== --- test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -4323,8 +4323,8 @@ ; AVX-LABEL: constrained_vector_fptoui_v2i32_v2f32: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax ; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq entry: @@ -4350,8 +4350,8 @@ ; AVX-LABEL: constrained_vector_fptoui_v3i32_v3f32: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax ; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax ; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 @@ -4383,8 +4383,8 @@ ; AVX-LABEL: constrained_vector_fptoui_v4i32_v4f32: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax -; AVX-NEXT: vcvttss2si {{.*}}(%rip), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax ; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX-NEXT: vcvttss2si {{.*}}(%rip), %eax ; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 @@ -4535,8 +4535,8 @@ ; AVX-LABEL: constrained_vector_fptoui_v2i32_v2f64: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax ; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX-NEXT: retq entry: @@ -4562,8 +4562,8 @@ ; AVX-LABEL: constrained_vector_fptoui_v3i32_v3f64: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax ; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax ; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 @@ -4595,8 +4595,8 @@ ; AVX-LABEL: constrained_vector_fptoui_v4i32_v4f64: ; AVX: # %bb.0: # %entry ; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax -; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %ecx -; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax ; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX-NEXT: vcvttsd2si {{.*}}(%rip), %eax ; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0