Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -173,8 +173,8 @@ SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain); - SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, - const SDLoc &dl); + void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl, + SmallVectorImpl &Results); void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl, SmallVectorImpl &Results); @@ -2492,9 +2492,13 @@ /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP /// operation that takes a larger input. -SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, - bool isSigned, - const SDLoc &dl) { +void SelectionDAGLegalize::PromoteLegalINT_TO_FP( + SDNode *N, const SDLoc &dl, SmallVectorImpl &Results) { + bool IsStrict = N->isStrictFPOpcode(); + bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP || + N->getOpcode() == ISD::STRICT_SINT_TO_FP; + EVT DestVT = N->getValueType(0); + SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0); // First step, figure out the appropriate *INT_TO_FP operation to use. EVT NewInTy = LegalOp.getValueType(); @@ -2506,26 +2510,34 @@ assert(NewInTy.isInteger() && "Ran out of possibilities!"); // If the target supports SINT_TO_FP of this type, use it. - if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) { - OpToUse = ISD::SINT_TO_FP; + OpToUse = IsStrict ? ISD::STRICT_SINT_TO_FP : ISD::SINT_TO_FP; + if (TLI.isOperationLegalOrCustom(OpToUse, NewInTy)) break; - } - if (isSigned) continue; + if (IsSigned) continue; // If the target supports UINT_TO_FP of this type, use it. - if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) { - OpToUse = ISD::UINT_TO_FP; + OpToUse = IsStrict ? ISD::STRICT_UINT_TO_FP : ISD::UINT_TO_FP; + if (TLI.isOperationLegalOrCustom(OpToUse, NewInTy)) break; - } // Otherwise, try a larger type. } - // Okay, we found the operation and type to use. Zero extend our input to the + // Okay, we found the operation and type to use. Extend our input to the // desired type then run the operation on it. - return DAG.getNode(OpToUse, dl, DestVT, - DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, - dl, NewInTy, LegalOp)); + SDValue Ext = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + NewInTy, LegalOp); + + SDValue Operation; + if (IsStrict) + Operation = + DAG.getNode(OpToUse, dl, {DestVT, MVT::Other}, {N->getOperand(0), Ext}); + else + Operation = DAG.getNode(OpToUse, dl, DestVT, Ext); + + Results.push_back(Operation); + if (IsStrict) + Results.push_back(Operation.getValue(1)); } /// This function is responsible for legalizing a @@ -4221,10 +4233,12 @@ Node->getOpcode() == ISD::SINT_TO_FP || Node->getOpcode() == ISD::SETCC || Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT || - Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { + Node->getOpcode() == ISD::INSERT_VECTOR_ELT) OVT = Node->getOperand(0).getSimpleValueType(); - } - if (Node->getOpcode() == ISD::BR_CC) + else if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP || + Node->getOpcode() == ISD::STRICT_SINT_TO_FP) + OVT = Node->getOperand(1).getSimpleValueType(); + else if (Node->getOpcode() == ISD::BR_CC) OVT = Node->getOperand(2).getSimpleValueType(); MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); SDLoc dl(Node); @@ -4278,10 +4292,10 @@ PromoteLegalFP_TO_INT(Node, dl, Results); break; case ISD::UINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: - Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0), - Node->getOpcode() == ISD::SINT_TO_FP, dl); - Results.push_back(Tmp1); + case ISD::STRICT_SINT_TO_FP: + PromoteLegalINT_TO_FP(Node, dl, Results); break; case ISD::VAARG: { SDValue Chain = Node->getOperand(0); // Get the chain. Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -228,8 +228,10 @@ if (!Subtarget.useSoftFloat()) { // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this // operation. - setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote); // We have an algorithm for SSE2, and we turn this into a 64-bit // FILD or VCVTUSI2SS/SD for other targets. setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); @@ -241,16 +243,18 @@ // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have // this operation. - setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote); // SSE has no i16 to fp conversion, only i32. We promote in the handler // to allow f80 to use i16 and f64 to use i16 with sse1 only - setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom); // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 // are Legal, f80 is custom lowered. - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have @@ -18423,7 +18427,8 @@ Op.getOpcode() == ISD::STRICT_UINT_TO_FP || Op.getOpcode() == ISD::UINT_TO_FP) && "Unexpected opcode!"); - unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; + bool IsStrict = Op->isStrictFPOpcode(); + unsigned OpNo = IsStrict ? 1 : 0; SDValue Src = Op.getOperand(OpNo); MVT SrcVT = Src.getSimpleValueType(); MVT VT = Op.getSimpleValueType(); @@ -18441,7 +18446,7 @@ SDLoc dl(Op); SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src); - if (Op.getNode()->isStrictFPOpcode()) { + if (IsStrict) { SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {VecVT, MVT::Other}, {Op.getOperand(0), InVec}); SDValue Chain = CvtVec.getValue(1); @@ -18522,7 +18527,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { - unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; + bool IsStrict = Op->isStrictFPOpcode(); + unsigned OpNo = IsStrict ? 1 : 0; SDValue Src = Op.getOperand(OpNo); MVT SrcVT = Src.getSimpleValueType(); MVT VT = Op.getSimpleValueType(); @@ -18532,8 +18538,7 @@ return Extract; if (SrcVT.isVector()) { - if (SrcVT == MVT::v2i32 && VT == MVT::v2f64 && - !Op.getNode()->isStrictFPOpcode()) { + if (SrcVT == MVT::v2i32 && VT == MVT::v2f64 && !IsStrict) { // FIXME: A strict version of CVTSI2P is needed. return DAG.getNode(X86ISD::CVTSI2P, dl, VT, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, @@ -18560,13 +18565,17 @@ // SSE doesn't have an i16 conversion so we need to promote. if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) { SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Src); + if (IsStrict) + return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, + {Op.getOperand(0), Ext}); + return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext); } if (VT == MVT::f128) return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT)); - SDValue ValueToStore = Op.getOperand(OpNo); + SDValue ValueToStore = Src; if (SrcVT == MVT::i64 && UseSSEReg && !Subtarget.is64Bit()) // Bitcasting to f64 here allows us to do a single 64-bit store from // an SSE register, avoiding the store forwarding penalty that would come @@ -18578,10 +18587,15 @@ auto PtrVT = getPointerTy(MF.getDataLayout()); int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); - SDValue Chain = DAG.getStore( - DAG.getEntryNode(), dl, ValueToStore, StackSlot, + SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode(); + Chain = DAG.getStore( + Chain, dl, ValueToStore, StackSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI)); - return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG).first; + std::pair Tmp = BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); + if (IsStrict) + return DAG.getMergeValues({Tmp.first, Tmp.second}, dl); + + return Tmp.first; } std::pair X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, @@ -18669,7 +18683,8 @@ #endif */ - unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; + bool IsStrict = Op->isStrictFPOpcode(); + unsigned OpNo = IsStrict ? 1 : 0; SDLoc dl(Op); LLVMContext *Context = DAG.getContext(); @@ -18707,7 +18722,7 @@ SDValue Sub; SDValue Chain; // TODO: Are there any fast-math-flags to propagate here? - if (Op.getNode()->isStrictFPOpcode()) { + if (IsStrict) { Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other}, {Op.getOperand(0), XR2F, CLod1}); Chain = Sub.getValue(1); @@ -18715,26 +18730,25 @@ Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); SDValue Result; - if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) { + if (!IsStrict && Subtarget.hasSSE3() && + shouldUseHorizontalOp(true, DAG, Subtarget)) { // FIXME: Do we need a STRICT version of FHADD? Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); } else { SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1}); - if (Op.getNode()->isStrictFPOpcode()) { + if (IsStrict) { Result = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v2f64, MVT::Other}, {Chain, Shuffle, Sub}); Chain = Result.getValue(1); } else Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub); } - if (Op.getNode()->isStrictFPOpcode()) { - Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, - DAG.getIntPtrConstant(0, dl)); + Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, + DAG.getIntPtrConstant(0, dl)); + if (IsStrict) return DAG.getMergeValues({Result, Chain}, dl); - } - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, - DAG.getIntPtrConstant(0, dl)); + return Result; } /// 32-bit unsigned integer to float expansion. @@ -18775,14 +18789,14 @@ SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other}, {Chain, Or, Bias}); - if (!Op.getValueType().bitsEq(Sub.getValueType())) { - // Handle final rounding. - std::pair ResultPair = DAG.getStrictFPExtendOrRound( - Sub, Sub.getValue(1), dl, Op.getSimpleValueType()); + if (Op.getValueType() == Sub.getValueType()) + return Sub; - return DAG.getMergeValues({ResultPair.first, ResultPair.second}, dl); - } - return Sub; + // Handle final rounding. + std::pair ResultPair = DAG.getStrictFPExtendOrRound( + Sub, Sub.getValue(1), dl, Op.getSimpleValueType()); + + return DAG.getMergeValues({ResultPair.first, ResultPair.second}, dl); } // Subtract the bias. @@ -18948,14 +18962,14 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { - unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; - SDValue N0 = Op.getOperand(OpNo); + bool IsStrict = Op->isStrictFPOpcode(); + unsigned OpNo = IsStrict ? 1 : 0; + SDValue Src = Op.getOperand(OpNo); SDLoc dl(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); - MVT SrcVT = N0.getSimpleValueType(); + MVT SrcVT = Src.getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); - SDValue Chain = - Op.getNode()->isStrictFPOpcode() ? Op.getOperand(0) : DAG.getEntryNode(); + SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); if (DstVT == MVT::f128) return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT)); @@ -18975,12 +18989,12 @@ // Promote i32 to i64 and use a signed conversion on 64-bit targets. if (SrcVT == MVT::i32 && Subtarget.is64Bit()) { - N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, N0); - if (Op.getNode()->isStrictFPOpcode()) { + Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Src); + if (IsStrict) return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other}, - {Chain, N0}); - } - return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, N0); + {Chain, Src}); + + return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); } if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget)) @@ -18997,20 +19011,26 @@ SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64); if (SrcVT == MVT::i32) { SDValue OffsetSlot = DAG.getMemBasePlusOffset(StackSlot, 4, dl); - SDValue Store1 = DAG.getStore(Chain, dl, Op.getOperand(OpNo), StackSlot, - MachinePointerInfo()); + SDValue Store1 = + DAG.getStore(Chain, dl, Src, StackSlot, MachinePointerInfo()); SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32), OffsetSlot, MachinePointerInfo()); - return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG).first; + std::pair Tmp = + BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); + if (IsStrict) + return DAG.getMergeValues({Tmp.first, Tmp.second}, dl); + + return Tmp.first; } assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); - SDValue ValueToStore = Op.getOperand(OpNo); - if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) + SDValue ValueToStore = Src; + if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) { // Bitcasting to f64 here allows us to do a single 64-bit store from // an SSE register, avoiding the store forwarding penalty that would come // with two 32-bit stores. ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); + } SDValue Store = DAG.getStore(Chain, dl, ValueToStore, StackSlot, MachinePointerInfo()); // For i64 source, we need to add the appropriate power of 2 if the input @@ -19055,7 +19075,7 @@ Chain = Fudge.getValue(1); // Extend everything to 80 bits to force it to be done on x87. // TODO: Are there any fast-math-flags to propagate here? - if (Op.getNode()->isStrictFPOpcode()) { + if (IsStrict) { SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other}, {Chain, Fild, Fudge}); return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other}, @@ -19114,10 +19134,7 @@ int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); - if (IsStrict) - Chain = Op.getOperand(0); - else - Chain = DAG.getEntryNode(); + Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment. Index: llvm/test/CodeGen/X86/fp-intrinsics.ll =================================================================== --- llvm/test/CodeGen/X86/fp-intrinsics.ll +++ llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -1942,8 +1942,92 @@ } ; Verify that sitofp(%x) isn't simplified when the rounding mode is -; unknown. The expansion should have only one conversion instruction. +; unknown. ; Verify that no gross errors happen. +define double @sifdb(i8 %x) #0 { +; X87-LABEL: sifdb: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: sifdb: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: sifdb: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movsbl %dil, %eax +; SSE-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: sifdb: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movsbl %dil, %eax +; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.sitofp.f64.i8(i8 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +define double @sifdw(i16 %x) #0 { +; X87-LABEL: sifdw: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: sifdw: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: sifdw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movswl %di, %eax +; SSE-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: sifdw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movswl %di, %eax +; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.sitofp.f64.i16(i16 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + define double @sifdi(i32 %x) #0 { ; X87-LABEL: sifdi: ; X87: # %bb.0: # %entry @@ -1983,6 +2067,90 @@ ret double %result } +define float @siffb(i8 %x) #0 { +; X87-LABEL: siffb: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: siffb: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0 +; X86-SSE-NEXT: movss %xmm0, (%esp) +; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: popl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: siffb: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movsbl %dil, %eax +; SSE-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: siffb: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movsbl %dil, %eax +; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call float @llvm.experimental.constrained.sitofp.f32.i8(i8 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + +define float @siffw(i16 %x) #0 { +; X87-LABEL: siffw: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: siffw: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0 +; X86-SSE-NEXT: movss %xmm0, (%esp) +; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: popl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: siffw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movswl %di, %eax +; SSE-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: siffw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movswl %di, %eax +; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + define float @siffi(i32 %x) #0 { ; X87-LABEL: siffi: ; X87: # %bb.0: # %entry @@ -2109,8 +2277,92 @@ } ; Verify that uitofp(%x) isn't simplified when the rounding mode is -; unknown. Expansions from i32 should have only one conversion instruction. +; unknown. ; Verify that no gross errors happen. +define double @uifdb(i8 %x) #0 { +; X87-LABEL: uifdb: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: uifdb: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: uifdb: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movzbl %dil, %eax +; SSE-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: uifdb: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movzbl %dil, %eax +; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.uitofp.f64.i8(i8 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +define double @uifdw(i16 %x) #0 { +; X87-LABEL: uifdw: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: fildl (%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: uifdw: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: uifdw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movzwl %di, %eax +; SSE-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: uifdw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movzwl %di, %eax +; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.uitofp.f64.i16(i16 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + define double @uifdi(i32 %x) #0 { ; X87-LABEL: uifdi: ; X87: # %bb.0: # %entry @@ -2227,6 +2479,90 @@ ret double %result } +define float @uiffb(i8 %x) #0 { +; X87-LABEL: uiffb: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: uiffb: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0 +; X86-SSE-NEXT: movss %xmm0, (%esp) +; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: popl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: uiffb: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movzbl %dil, %eax +; SSE-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: uiffb: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movzbl %dil, %eax +; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call float @llvm.experimental.constrained.uitofp.f32.i8(i8 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + +define float @uiffw(i16 %x) #0 { +; X87-LABEL: uiffw: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: fildl (%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: uiffw: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0 +; X86-SSE-NEXT: movss %xmm0, (%esp) +; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: popl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: uiffw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movzwl %di, %eax +; SSE-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: uiffw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movzwl %di, %eax +; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + define float @uiffi(i32 %x) #0 { ; X87-LABEL: uiffi: ; X87: # %bb.0: # %entry @@ -2401,11 +2737,19 @@ declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata) declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata) declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i8(i8, metadata, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i16(i16, metadata, metadata) declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) -declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i8(i8, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i8(i8, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i16(i16, metadata, metadata) declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i8(i8, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata) declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata) declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata)