diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25427,8 +25427,11 @@ 2 Round to +inf 3 Round to -inf - To perform the conversion, we do: - (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) + To perform the conversion, we use a packed lookup table of the four 2-bit + values that we can index by FPSP[11:10] + 0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10] + + (0x2d >> ((FPSR & 0xc00) >> 9)) & 3 */ MachineFunction &MF = DAG.getMachineFunction(); @@ -25456,24 +25459,19 @@ SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo()); - // Transform as necessary - SDValue CWD1 = - DAG.getNode(ISD::SRL, DL, MVT::i16, - DAG.getNode(ISD::AND, DL, MVT::i16, - CWD, DAG.getConstant(0x800, DL, MVT::i16)), - DAG.getConstant(11, DL, MVT::i8)); - SDValue CWD2 = + // Mask and turn the control bits into a shift for the lookup table. + SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i16, DAG.getNode(ISD::AND, DL, MVT::i16, - CWD, DAG.getConstant(0x400, DL, MVT::i16)), + CWD, DAG.getConstant(0xc00, DL, MVT::i16)), DAG.getConstant(9, DL, MVT::i8)); + Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Shift); + SDValue LUT = DAG.getConstant(0x2d, DL, MVT::i32); SDValue RetVal = - DAG.getNode(ISD::AND, DL, MVT::i16, - DAG.getNode(ISD::ADD, DL, MVT::i16, - DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2), - DAG.getConstant(1, DL, MVT::i16)), - DAG.getConstant(3, DL, MVT::i16)); + DAG.getNode(ISD::AND, DL, MVT::i32, + DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift), + DAG.getConstant(3, DL, MVT::i32)); return DAG.getZExtOrTrunc(RetVal, DL, VT); } diff --git a/llvm/test/CodeGen/X86/flt-rounds.ll b/llvm/test/CodeGen/X86/flt-rounds.ll --- a/llvm/test/CodeGen/X86/flt-rounds.ll +++ b/llvm/test/CodeGen/X86/flt-rounds.ll @@ -10,13 +10,12 @@ ; X86: # %bb.0: ; X86-NEXT: subl $12, %esp ; X86-NEXT: fnstcw (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movzwl (%esp), %ecx ; X86-NEXT: shrl $9, %ecx -; X86-NEXT: andl $2, %ecx -; X86-NEXT: shrl $11, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: leal 1(%eax,%ecx), %eax +; X86-NEXT: andb $6, %cl +; X86-NEXT: movl $45, %eax +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shrl %cl, %eax ; X86-NEXT: andl $3, %eax ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl @@ -24,13 +23,12 @@ ; X64-LABEL: test_flt_rounds: ; X64: # %bb.0: ; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; X64-NEXT: movl %eax, %ecx +; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx ; X64-NEXT: shrl $9, %ecx -; X64-NEXT: andl $2, %ecx -; X64-NEXT: shrl $11, %eax -; X64-NEXT: andl $1, %eax -; X64-NEXT: leal 1(%rax,%rcx), %eax +; X64-NEXT: andb $6, %cl +; X64-NEXT: movl $45, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shrl %cl, %eax ; X64-NEXT: andl $3, %eax ; X64-NEXT: retq %1 = call i32 @llvm.flt.rounds()