Skip to content

Commit 0add090

Browse files
committedDec 4, 2018
[TargetLowering] expandFP_TO_UINT - avoid FPE due to out of range conversion (PR17686)
PR17686 demonstrates that for some targets FP exceptions can fire in cases where the FP_TO_UINT is expanded using a FP_TO_SINT instruction. The existing code converts both the inrange and outofrange cases using FP_TO_SINT and then selects the result, this patch changes this for 'strict' cases to pre-select the FP_TO_SINT input and the offset adjustment. The X87 cases don't need the strict flag but generates much nicer code with it.... Differential Revision: https://reviews.llvm.org/D53794 llvm-svn: 348251
1 parent eecf487 commit 0add090

File tree

6 files changed

+118
-130
lines changed

6 files changed

+118
-130
lines changed
 

‎llvm/include/llvm/CodeGen/TargetLowering.h

+10
Original file line numberDiff line numberDiff line change
@@ -1746,6 +1746,16 @@ class TargetLoweringBase {
17461746
return false;
17471747
}
17481748

1749+
/// Return true if it is more correct/profitable to use strict FP_TO_INT
1750+
/// conversion operations - canonicalizing the FP source value instead of
1751+
/// converting all cases and then selecting based on value.
1752+
/// This may be true if the target throws exceptions for out of bounds
1753+
/// conversions or has fast FP CMOV.
1754+
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1755+
bool IsSigned) const {
1756+
return false;
1757+
}
1758+
17491759
//===--------------------------------------------------------------------===//
17501760
// TargetLowering Configuration Methods - These methods should be invoked by
17511761
// the derived class constructor to configure this object for the target.

‎llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

+30-11
Original file line numberDiff line numberDiff line change
@@ -4200,20 +4200,39 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
42004200
return true;
42014201
}
42024202

4203-
// Expand based on maximum range of FP_TO_SINT:
4204-
// True = fp_to_sint(Src)
4205-
// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
4206-
// Result = select (Src < 0x8000000000000000), True, False
42074203
SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
42084204
SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
42094205

4210-
SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
4211-
// TODO: Should any fast-math-flags be set for the FSUB?
4212-
SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
4213-
DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
4214-
False = DAG.getNode(ISD::XOR, dl, DstVT, False,
4215-
DAG.getConstant(SignMask, dl, DstVT));
4216-
Result = DAG.getSelect(dl, DstVT, Sel, True, False);
4206+
bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
4207+
if (Strict) {
4208+
// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
4209+
// signmask then offset (the result of which should be fully representable).
4210+
// Sel = Src < 0x8000000000000000
4211+
// Val = select Sel, Src, Src - 0x8000000000000000
4212+
// Ofs = select Sel, 0, 0x8000000000000000
4213+
// Result = fp_to_sint(Val) ^ Ofs
4214+
4215+
// TODO: Should any fast-math-flags be set for the FSUB?
4216+
SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src,
4217+
DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
4218+
SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
4219+
DAG.getConstant(SignMask, dl, DstVT));
4220+
Result = DAG.getNode(ISD::XOR, dl, DstVT,
4221+
DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs);
4222+
} else {
4223+
// Expand based on maximum range of FP_TO_SINT:
4224+
// True = fp_to_sint(Src)
4225+
// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
4226+
// Result = select (Src < 0x8000000000000000), True, False
4227+
4228+
SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
4229+
// TODO: Should any fast-math-flags be set for the FSUB?
4230+
SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
4231+
DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
4232+
False = DAG.getNode(ISD::XOR, dl, DstVT, False,
4233+
DAG.getConstant(SignMask, dl, DstVT));
4234+
Result = DAG.getSelect(dl, DstVT, Sel, True, False);
4235+
}
42174236
return true;
42184237
}
42194238

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -4812,6 +4812,12 @@ bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
48124812
(1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
48134813
}
48144814

4815+
bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
4816+
bool IsSigned) const {
4817+
// f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available.
4818+
return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov();
4819+
}
4820+
48154821
bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
48164822
unsigned Index) const {
48174823
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))

‎llvm/lib/Target/X86/X86ISelLowering.h

+3
Original file line numberDiff line numberDiff line change
@@ -1047,6 +1047,9 @@ namespace llvm {
10471047

10481048
bool decomposeMulByConstant(EVT VT, SDValue C) const override;
10491049

1050+
bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1051+
bool IsSigned) const override;
1052+
10501053
/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
10511054
/// with this index.
10521055
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,

‎llvm/test/CodeGen/X86/fp-cvt.ll

+32-56
Original file line numberDiff line numberDiff line change
@@ -483,29 +483,20 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
483483
; X64-X87-NEXT: flds {{.*}}(%rip)
484484
; X64-X87-NEXT: fld %st(1)
485485
; X64-X87-NEXT: fsub %st(1)
486+
; X64-X87-NEXT: xorl %eax, %eax
487+
; X64-X87-NEXT: fxch %st(1)
488+
; X64-X87-NEXT: fucompi %st(2)
489+
; X64-X87-NEXT: fcmovnbe %st(1), %st(0)
490+
; X64-X87-NEXT: fstp %st(1)
486491
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
487-
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
488-
; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
489-
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
490-
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
491-
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
492-
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
493-
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
494-
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
492+
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
495493
; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
496494
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
497-
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
498-
; X64-X87-NEXT: fld %st(1)
495+
; X64-X87-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
499496
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
500497
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
501-
; X64-X87-NEXT: fucompi %st(1)
502-
; X64-X87-NEXT: fstp %st(0)
503-
; X64-X87-NEXT: jbe .LBB10_1
504-
; X64-X87-NEXT: # %bb.2:
505-
; X64-X87-NEXT: movq -{{[0-9]+}}(%rsp), %rax
506-
; X64-X87-NEXT: retq
507-
; X64-X87-NEXT: .LBB10_1:
508-
; X64-X87-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
498+
; X64-X87-NEXT: setbe %al
499+
; X64-X87-NEXT: shlq $63, %rax
509500
; X64-X87-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
510501
; X64-X87-NEXT: retq
511502
;
@@ -515,17 +506,14 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
515506
; X64-SSSE3-NEXT: flds {{.*}}(%rip)
516507
; X64-SSSE3-NEXT: fld %st(1)
517508
; X64-SSSE3-NEXT: fsub %st(1)
509+
; X64-SSSE3-NEXT: xorl %eax, %eax
510+
; X64-SSSE3-NEXT: fxch %st(1)
511+
; X64-SSSE3-NEXT: fucompi %st(2)
512+
; X64-SSSE3-NEXT: fcmovnbe %st(1), %st(0)
513+
; X64-SSSE3-NEXT: fstp %st(1)
518514
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
519-
; X64-SSSE3-NEXT: fld %st(1)
520-
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
521-
; X64-SSSE3-NEXT: fucompi %st(1)
522-
; X64-SSSE3-NEXT: fstp %st(0)
523-
; X64-SSSE3-NEXT: jbe .LBB10_1
524-
; X64-SSSE3-NEXT: # %bb.2:
525-
; X64-SSSE3-NEXT: movq -{{[0-9]+}}(%rsp), %rax
526-
; X64-SSSE3-NEXT: retq
527-
; X64-SSSE3-NEXT: .LBB10_1:
528-
; X64-SSSE3-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
515+
; X64-SSSE3-NEXT: setbe %al
516+
; X64-SSSE3-NEXT: shlq $63, %rax
529517
; X64-SSSE3-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
530518
; X64-SSSE3-NEXT: retq
531519
%1 = fptoui x86_fp80 %a0 to i64
@@ -577,29 +565,20 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
577565
; X64-X87-NEXT: flds {{.*}}(%rip)
578566
; X64-X87-NEXT: fld %st(1)
579567
; X64-X87-NEXT: fsub %st(1)
568+
; X64-X87-NEXT: xorl %eax, %eax
569+
; X64-X87-NEXT: fxch %st(1)
570+
; X64-X87-NEXT: fucompi %st(2)
571+
; X64-X87-NEXT: fcmovnbe %st(1), %st(0)
572+
; X64-X87-NEXT: fstp %st(1)
580573
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
581-
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
582-
; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
583-
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
584-
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
585-
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
586-
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
587-
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
588-
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
574+
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
589575
; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
590576
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
591-
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
592-
; X64-X87-NEXT: fld %st(1)
577+
; X64-X87-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
593578
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
594579
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
595-
; X64-X87-NEXT: fucompi %st(1)
596-
; X64-X87-NEXT: fstp %st(0)
597-
; X64-X87-NEXT: jbe .LBB11_1
598-
; X64-X87-NEXT: # %bb.2:
599-
; X64-X87-NEXT: movq -{{[0-9]+}}(%rsp), %rax
600-
; X64-X87-NEXT: retq
601-
; X64-X87-NEXT: .LBB11_1:
602-
; X64-X87-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
580+
; X64-X87-NEXT: setbe %al
581+
; X64-X87-NEXT: shlq $63, %rax
603582
; X64-X87-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
604583
; X64-X87-NEXT: retq
605584
;
@@ -609,17 +588,14 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
609588
; X64-SSSE3-NEXT: flds {{.*}}(%rip)
610589
; X64-SSSE3-NEXT: fld %st(1)
611590
; X64-SSSE3-NEXT: fsub %st(1)
591+
; X64-SSSE3-NEXT: xorl %eax, %eax
592+
; X64-SSSE3-NEXT: fxch %st(1)
593+
; X64-SSSE3-NEXT: fucompi %st(2)
594+
; X64-SSSE3-NEXT: fcmovnbe %st(1), %st(0)
595+
; X64-SSSE3-NEXT: fstp %st(1)
612596
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
613-
; X64-SSSE3-NEXT: fld %st(1)
614-
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
615-
; X64-SSSE3-NEXT: fucompi %st(1)
616-
; X64-SSSE3-NEXT: fstp %st(0)
617-
; X64-SSSE3-NEXT: jbe .LBB11_1
618-
; X64-SSSE3-NEXT: # %bb.2:
619-
; X64-SSSE3-NEXT: movq -{{[0-9]+}}(%rsp), %rax
620-
; X64-SSSE3-NEXT: retq
621-
; X64-SSSE3-NEXT: .LBB11_1:
622-
; X64-SSSE3-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
597+
; X64-SSSE3-NEXT: setbe %al
598+
; X64-SSSE3-NEXT: shlq $63, %rax
623599
; X64-SSSE3-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
624600
; X64-SSSE3-NEXT: retq
625601
%1 = load x86_fp80, x86_fp80 *%a0

‎llvm/test/CodeGen/X86/scalar-fp-to-i64.ll

+37-63
Original file line numberDiff line numberDiff line change
@@ -1147,25 +1147,21 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
11471147
;
11481148
; SSE3_64_WIN-LABEL: x_to_u64:
11491149
; SSE3_64_WIN: # %bb.0:
1150-
; SSE3_64_WIN-NEXT: subq $16, %rsp
1150+
; SSE3_64_WIN-NEXT: pushq %rax
11511151
; SSE3_64_WIN-NEXT: fldt (%rcx)
11521152
; SSE3_64_WIN-NEXT: flds __real@{{.*}}(%rip)
11531153
; SSE3_64_WIN-NEXT: fld %st(1)
11541154
; SSE3_64_WIN-NEXT: fsub %st(1)
1155-
; SSE3_64_WIN-NEXT: fisttpll {{[0-9]+}}(%rsp)
1156-
; SSE3_64_WIN-NEXT: fld %st(1)
1155+
; SSE3_64_WIN-NEXT: xorl %eax, %eax
1156+
; SSE3_64_WIN-NEXT: fxch %st(1)
1157+
; SSE3_64_WIN-NEXT: fucompi %st(2)
1158+
; SSE3_64_WIN-NEXT: fcmovnbe %st(1), %st(0)
1159+
; SSE3_64_WIN-NEXT: fstp %st(1)
11571160
; SSE3_64_WIN-NEXT: fisttpll (%rsp)
1158-
; SSE3_64_WIN-NEXT: fucompi %st(1)
1159-
; SSE3_64_WIN-NEXT: fstp %st(0)
1160-
; SSE3_64_WIN-NEXT: jbe .LBB4_1
1161-
; SSE3_64_WIN-NEXT: # %bb.2:
1162-
; SSE3_64_WIN-NEXT: movq (%rsp), %rax
1163-
; SSE3_64_WIN-NEXT: addq $16, %rsp
1164-
; SSE3_64_WIN-NEXT: retq
1165-
; SSE3_64_WIN-NEXT: .LBB4_1:
1166-
; SSE3_64_WIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1167-
; SSE3_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax
1168-
; SSE3_64_WIN-NEXT: addq $16, %rsp
1161+
; SSE3_64_WIN-NEXT: setbe %al
1162+
; SSE3_64_WIN-NEXT: shlq $63, %rax
1163+
; SSE3_64_WIN-NEXT: xorq (%rsp), %rax
1164+
; SSE3_64_WIN-NEXT: popq %rcx
11691165
; SSE3_64_WIN-NEXT: retq
11701166
;
11711167
; SSE3_64_LIN-LABEL: x_to_u64:
@@ -1174,17 +1170,14 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
11741170
; SSE3_64_LIN-NEXT: flds {{.*}}(%rip)
11751171
; SSE3_64_LIN-NEXT: fld %st(1)
11761172
; SSE3_64_LIN-NEXT: fsub %st(1)
1173+
; SSE3_64_LIN-NEXT: xorl %eax, %eax
1174+
; SSE3_64_LIN-NEXT: fxch %st(1)
1175+
; SSE3_64_LIN-NEXT: fucompi %st(2)
1176+
; SSE3_64_LIN-NEXT: fcmovnbe %st(1), %st(0)
1177+
; SSE3_64_LIN-NEXT: fstp %st(1)
11771178
; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp)
1178-
; SSE3_64_LIN-NEXT: fld %st(1)
1179-
; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp)
1180-
; SSE3_64_LIN-NEXT: fucompi %st(1)
1181-
; SSE3_64_LIN-NEXT: fstp %st(0)
1182-
; SSE3_64_LIN-NEXT: jbe .LBB4_1
1183-
; SSE3_64_LIN-NEXT: # %bb.2:
1184-
; SSE3_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax
1185-
; SSE3_64_LIN-NEXT: retq
1186-
; SSE3_64_LIN-NEXT: .LBB4_1:
1187-
; SSE3_64_LIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1179+
; SSE3_64_LIN-NEXT: setbe %al
1180+
; SSE3_64_LIN-NEXT: shlq $63, %rax
11881181
; SSE3_64_LIN-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
11891182
; SSE3_64_LIN-NEXT: retq
11901183
;
@@ -1246,37 +1239,27 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
12461239
;
12471240
; SSE2_64_WIN-LABEL: x_to_u64:
12481241
; SSE2_64_WIN: # %bb.0:
1249-
; SSE2_64_WIN-NEXT: subq $24, %rsp
1242+
; SSE2_64_WIN-NEXT: subq $16, %rsp
12501243
; SSE2_64_WIN-NEXT: fldt (%rcx)
12511244
; SSE2_64_WIN-NEXT: flds __real@{{.*}}(%rip)
12521245
; SSE2_64_WIN-NEXT: fld %st(1)
12531246
; SSE2_64_WIN-NEXT: fsub %st(1)
1247+
; SSE2_64_WIN-NEXT: xorl %eax, %eax
1248+
; SSE2_64_WIN-NEXT: fxch %st(1)
1249+
; SSE2_64_WIN-NEXT: fucompi %st(2)
1250+
; SSE2_64_WIN-NEXT: fcmovnbe %st(1), %st(0)
1251+
; SSE2_64_WIN-NEXT: fstp %st(1)
12541252
; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp)
1255-
; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
1253+
; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
12561254
; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F
12571255
; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp)
1258-
; SSE2_64_WIN-NEXT: movw %ax, {{[0-9]+}}(%rsp)
1256+
; SSE2_64_WIN-NEXT: movw %cx, {{[0-9]+}}(%rsp)
12591257
; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp)
12601258
; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp)
1261-
; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp)
1262-
; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
1263-
; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F
1264-
; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp)
1265-
; SSE2_64_WIN-NEXT: movw %ax, {{[0-9]+}}(%rsp)
1266-
; SSE2_64_WIN-NEXT: fld %st(1)
1267-
; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp)
1268-
; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp)
1269-
; SSE2_64_WIN-NEXT: fucompi %st(1)
1270-
; SSE2_64_WIN-NEXT: fstp %st(0)
1271-
; SSE2_64_WIN-NEXT: jbe .LBB4_1
1272-
; SSE2_64_WIN-NEXT: # %bb.2:
1273-
; SSE2_64_WIN-NEXT: movq {{[0-9]+}}(%rsp), %rax
1274-
; SSE2_64_WIN-NEXT: addq $24, %rsp
1275-
; SSE2_64_WIN-NEXT: retq
1276-
; SSE2_64_WIN-NEXT: .LBB4_1:
1277-
; SSE2_64_WIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1259+
; SSE2_64_WIN-NEXT: setbe %al
1260+
; SSE2_64_WIN-NEXT: shlq $63, %rax
12781261
; SSE2_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax
1279-
; SSE2_64_WIN-NEXT: addq $24, %rsp
1262+
; SSE2_64_WIN-NEXT: addq $16, %rsp
12801263
; SSE2_64_WIN-NEXT: retq
12811264
;
12821265
; SSE2_64_LIN-LABEL: x_to_u64:
@@ -1285,29 +1268,20 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
12851268
; SSE2_64_LIN-NEXT: flds {{.*}}(%rip)
12861269
; SSE2_64_LIN-NEXT: fld %st(1)
12871270
; SSE2_64_LIN-NEXT: fsub %st(1)
1271+
; SSE2_64_LIN-NEXT: xorl %eax, %eax
1272+
; SSE2_64_LIN-NEXT: fxch %st(1)
1273+
; SSE2_64_LIN-NEXT: fucompi %st(2)
1274+
; SSE2_64_LIN-NEXT: fcmovnbe %st(1), %st(0)
1275+
; SSE2_64_LIN-NEXT: fstp %st(1)
12881276
; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp)
1289-
; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1277+
; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
12901278
; SSE2_64_LIN-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
12911279
; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp)
1292-
; SSE2_64_LIN-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
1280+
; SSE2_64_LIN-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
12931281
; SSE2_64_LIN-NEXT: fistpll -{{[0-9]+}}(%rsp)
12941282
; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp)
1295-
; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp)
1296-
; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1297-
; SSE2_64_LIN-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
1298-
; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp)
1299-
; SSE2_64_LIN-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
1300-
; SSE2_64_LIN-NEXT: fld %st(1)
1301-
; SSE2_64_LIN-NEXT: fistpll -{{[0-9]+}}(%rsp)
1302-
; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp)
1303-
; SSE2_64_LIN-NEXT: fucompi %st(1)
1304-
; SSE2_64_LIN-NEXT: fstp %st(0)
1305-
; SSE2_64_LIN-NEXT: jbe .LBB4_1
1306-
; SSE2_64_LIN-NEXT: # %bb.2:
1307-
; SSE2_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax
1308-
; SSE2_64_LIN-NEXT: retq
1309-
; SSE2_64_LIN-NEXT: .LBB4_1:
1310-
; SSE2_64_LIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
1283+
; SSE2_64_LIN-NEXT: setbe %al
1284+
; SSE2_64_LIN-NEXT: shlq $63, %rax
13111285
; SSE2_64_LIN-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
13121286
; SSE2_64_LIN-NEXT: retq
13131287
;

0 commit comments

Comments
 (0)
Please sign in to comment.