Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -4121,6 +4121,7 @@ /// Expand float to UINT conversion /// \param N Node to expand /// \param Result output after conversion + /// \param Output output chain after conversion /// \returns True, if the expansion was successful, false otherwise bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const; @@ -4128,6 +4129,7 @@ /// Expand UINT(i64) to double(f64) conversion /// \param N Node to expand /// \param Result output after conversion + /// \param Output output chain after conversion /// \returns True, if the expansion was successful, false otherwise bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const; Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -172,10 +172,10 @@ SDValue NewIntValue) const; SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; - SDValue ExpandLegalINT_TO_FP(bool isSigned, SDNode *Node, EVT DestVT, - const SDLoc &dl); - SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, - const SDLoc &dl); + void ExpandLegalINT_TO_FP(SDNode *Node, const SDLoc &dl, + SmallVectorImpl &Results); + void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl, + SmallVectorImpl &Results); void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl, SmallVectorImpl &Results); @@ -2345,12 +2345,15 @@ /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are /// legal for the target. -SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDNode *Node, - EVT DestVT, - const SDLoc &dl) { - unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; +void SelectionDAGLegalize::ExpandLegalINT_TO_FP( + SDNode *Node, const SDLoc &dl, SmallVectorImpl &Results) { + bool IsStrict = Node->isStrictFPOpcode(); + bool IsSigned = Node->getOpcode() == ISD::SINT_TO_FP || + Node->getOpcode() == ISD::STRICT_SINT_TO_FP; + unsigned OpNo = IsStrict ? 1 : 0; SDValue Op0 = Node->getOperand(OpNo); EVT SrcVT = Op0.getValueType(); + EVT DestVT = Node->getValueType(0); // TODO: Should any fast-math-flags be set for the created nodes? LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n"); @@ -2373,7 +2376,7 @@ // if signed map to unsigned space SDValue Op0Mapped; - if (isSigned) { + if (IsSigned) { // constant used to invert sign bit (signed to unsigned mapping) SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32); Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit); @@ -2392,43 +2395,40 @@ SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, MachinePointerInfo()); // FP constant to bias correct the final result - SDValue Bias = DAG.getConstantFP(isSigned ? + SDValue Bias = DAG.getConstantFP(IsSigned ? BitsToDouble(0x4330000080000000ULL) : BitsToDouble(0x4330000000000000ULL), dl, MVT::f64); // subtract the bias - SDValue Sub; - if (Node->isStrictFPOpcode()) { - Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other}, - {Node->getOperand(0), Load, Bias}); - } else - Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); - // final result - SDValue OldChain = SDValue(Node, 1); - SDValue Result, NewChain; - if (Node->isStrictFPOpcode()) { - if (!DestVT.bitsEq(Sub.getValueType())) { + if (IsStrict) { + SDValue Result = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other}, + {Node->getOperand(0), Load, Bias}); + SDValue NewChain = Result.getValue(1); + + if (DestVT != Result.getValueType()) { std::pair ResultPair; - ResultPair = DAG.getStrictFPExtendOrRound(Sub, OldChain, dl, DestVT); + ResultPair = DAG.getStrictFPExtendOrRound(Result, NewChain, dl, DestVT); Result = ResultPair.first; NewChain = ResultPair.second; } - else - Result = Sub; - // Finally relink the chain - DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); - } else - Result = DAG.getFPExtendOrRound(Sub, dl, DestVT); - return Result; + + Results.push_back(Result); + Results.push_back(NewChain); + return; + } + + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); + Results.push_back(DAG.getFPExtendOrRound(Sub, dl, DestVT)); + return; } - assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); - // Code below here assumes !isSigned without checking again. + assert(!IsSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + // Code below here assumes !IsSigned without checking again. SDValue Tmp1; - if (Node->isStrictFPOpcode()) { - Tmp1 = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other }, - { Node->getOperand(0), Op0 }); - } else + if (IsStrict) + Tmp1 = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DestVT, MVT::Other}, + {Node->getOperand(0), Op0}); + else Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0, @@ -2475,15 +2475,15 @@ FudgeInReg = Handle.getValue(); } - if (Node->isStrictFPOpcode()) { - SDValue Result = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other }, - { Tmp1.getValue(1), Tmp1, FudgeInReg }); - // Relink the chain - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Result.getValue(1)); - return Result; + if (IsStrict) { + SDValue Result = DAG.getNode(ISD::STRICT_FADD, dl, {DestVT, MVT::Other}, + {Tmp1.getValue(1), Tmp1, FudgeInReg}); + Results.push_back(Result); + Results.push_back(Result.getValue(1)); + return; } - return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); + Results.push_back(DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg)); } /// This function is responsible for legalizing a @@ -2491,9 +2491,13 @@ /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP /// operation that takes a larger input. -SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, - bool isSigned, - const SDLoc &dl) { +void SelectionDAGLegalize::PromoteLegalINT_TO_FP( + SDNode *N, const SDLoc &dl, SmallVectorImpl &Results) { + bool IsStrict = N->isStrictFPOpcode(); + bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP || + N->getOpcode() == ISD::STRICT_SINT_TO_FP; + EVT DestVT = N->getValueType(0); + SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0); // First step, figure out the appropriate *INT_TO_FP operation to use. EVT NewInTy = LegalOp.getValueType(); @@ -2505,26 +2509,34 @@ assert(NewInTy.isInteger() && "Ran out of possibilities!"); // If the target supports SINT_TO_FP of this type, use it. - if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) { - OpToUse = ISD::SINT_TO_FP; + OpToUse = IsStrict ? ISD::STRICT_SINT_TO_FP : ISD::SINT_TO_FP; + if (TLI.isOperationLegalOrCustom(OpToUse, NewInTy)) break; - } - if (isSigned) continue; + if (IsSigned) continue; // If the target supports UINT_TO_FP of this type, use it. - if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) { - OpToUse = ISD::UINT_TO_FP; + OpToUse = IsStrict ? ISD::STRICT_UINT_TO_FP : ISD::UINT_TO_FP; + if (TLI.isOperationLegalOrCustom(OpToUse, NewInTy)) break; - } // Otherwise, try a larger type. } - // Okay, we found the operation and type to use. Zero extend our input to the + // Okay, we found the operation and type to use. Extend our input to the // desired type then run the operation on it. - return DAG.getNode(OpToUse, dl, DestVT, - DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, - dl, NewInTy, LegalOp)); + SDValue Ext = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + NewInTy, LegalOp); + + SDValue Operation; + if (IsStrict) + Operation = + DAG.getNode(OpToUse, dl, {DestVT, MVT::Other}, {N->getOperand(0), Ext}); + else + Operation = DAG.getNode(OpToUse, dl, DestVT, Ext); + + Results.push_back(Operation); + if (IsStrict) + Results.push_back(Operation.getValue(1)); } /// This function is responsible for legalizing a @@ -2943,10 +2955,8 @@ case ISD::STRICT_UINT_TO_FP: if (TLI.expandUINT_TO_FP(Node, Tmp1, Tmp2, DAG)) { if (Node->isStrictFPOpcode()) { - // Relink the chain. - DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2); - // Replace the new UINT result. - ReplaceNodeWithValue(SDValue(Node, 0), Tmp1); + Results.push_back(Tmp1); + Results.push_back(Tmp2); LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_UINT_TO_FP node\n"); } else Results.push_back(Tmp1); @@ -2955,16 +2965,7 @@ LLVM_FALLTHROUGH; case ISD::SINT_TO_FP: case ISD::STRICT_SINT_TO_FP: - if (Node->isStrictFPOpcode()) { - Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::STRICT_SINT_TO_FP, - Node, Node->getValueType(0), dl); - ReplaceNode(Node, Tmp1.getNode()); - LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_xINT_TO_FP node\n"); - return true; - } - Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, - Node, Node->getValueType(0), dl); - Results.push_back(Tmp1); + ExpandLegalINT_TO_FP(Node, dl, Results); break; case ISD::FP_TO_SINT: if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) @@ -4231,10 +4232,12 @@ Node->getOpcode() == ISD::SINT_TO_FP || Node->getOpcode() == ISD::SETCC || Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT || - Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { + Node->getOpcode() == ISD::INSERT_VECTOR_ELT) OVT = Node->getOperand(0).getSimpleValueType(); - } - if (Node->getOpcode() == ISD::BR_CC) + else if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP || + Node->getOpcode() == ISD::STRICT_SINT_TO_FP) + OVT = Node->getOperand(1).getSimpleValueType(); + else if (Node->getOpcode() == ISD::BR_CC) OVT = Node->getOperand(2).getSimpleValueType(); MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); SDLoc dl(Node); @@ -4288,10 +4291,10 @@ PromoteLegalFP_TO_INT(Node, dl, Results); break; case ISD::UINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: - Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0), - Node->getOpcode() == ISD::SINT_TO_FP, dl); - Results.push_back(Tmp1); + case ISD::STRICT_SINT_TO_FP: + PromoteLegalINT_TO_FP(Node, dl, Results); break; case ISD::VAARG: { SDValue Chain = Node->getOperand(0); // Get the chain. Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3082,7 +3082,7 @@ case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_SINT_TO_FP: case ISD::STRICT_UINT_TO_FP: - return WidenVecRes_Convert_StrictFP(N); + return WidenVecRes_Convert_StrictFP(N); default: break; } Index: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6148,8 +6148,8 @@ if (Node->isStrictFPOpcode()) { SDValue SignCvt = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other}, {Chain, Or}); - Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other }, - { SignCvt.getValue(1), SignCvt, SignCvt }); + Slow = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other}, + {SignCvt.getValue(1), SignCvt, SignCvt}); Chain = Slow.getValue(1); } else { SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or); Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -228,8 +228,10 @@ if (!Subtarget.useSoftFloat()) { // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this // operation. - setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote); // We have an algorithm for SSE2, and we turn this into a 64-bit // FILD or VCVTUSI2SS/SD for other targets. setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); @@ -241,16 +243,18 @@ // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have // this operation. - setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote); // SSE has no i16 to fp conversion, only i32. We promote in the handler // to allow f80 to use i16 and f64 to use i16 with sse1 only - setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom); // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 // are Legal, f80 is custom lowered. - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have @@ -18423,7 +18427,8 @@ Op.getOpcode() == ISD::STRICT_UINT_TO_FP || Op.getOpcode() == ISD::UINT_TO_FP) && "Unexpected opcode!"); - unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; + bool IsStrict = Op->isStrictFPOpcode(); + unsigned OpNo = IsStrict ? 1 : 0; SDValue Src = Op.getOperand(OpNo); MVT SrcVT = Src.getSimpleValueType(); MVT VT = Op.getSimpleValueType(); @@ -18441,13 +18446,13 @@ SDLoc dl(Op); SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src); - if (Op.getNode()->isStrictFPOpcode()) { + if (IsStrict) { SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {VecVT, MVT::Other}, {Op.getOperand(0), InVec}); - SDValue Chain = SDValue(CvtVec.getNode(), 1); + SDValue Chain = CvtVec.getValue(1); SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, DAG.getIntPtrConstant(0, dl)); - return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Value, Chain); + return DAG.getMergeValues({Value, Chain}, dl); } SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, VecVT, InVec); @@ -18522,7 +18527,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { - unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; + bool IsStrict = Op->isStrictFPOpcode(); + unsigned OpNo = IsStrict ? 1 : 0; SDValue Src = Op.getOperand(OpNo); MVT SrcVT = Src.getSimpleValueType(); MVT VT = Op.getSimpleValueType(); @@ -18533,7 +18539,7 @@ if (SrcVT.isVector()) { if (SrcVT == MVT::v2i32 && VT == MVT::v2f64 && - !Op.getNode()->isStrictFPOpcode()) { + !IsStrict) { // FIXME:: Strict FP! return DAG.getNode(X86ISD::CVTSI2P, dl, VT, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, DAG.getUNDEF(SrcVT))); @@ -18559,28 +18565,38 @@ // SSE doesn't have an i16 conversion so we need to promote. if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) { SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Src); + if (IsStrict) + return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, + {Op.getOperand(0), Ext}); + return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext); } if (VT == MVT::f128) return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT)); - SDValue ValueToStore = Op.getOperand(OpNo); + SDValue ValueToStore = Src; if (SrcVT == MVT::i64 && UseSSEReg && !Subtarget.is64Bit()) // Bitcasting to f64 here allows us to do a single 64-bit store from // an SSE register, avoiding the store forwarding penalty that would come // with two 32-bit stores. ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); + // FIXME:: Support strict FP! unsigned Size = SrcVT.getSizeInBits()/8; MachineFunction &MF = DAG.getMachineFunction(); auto PtrVT = getPointerTy(MF.getDataLayout()); int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); - SDValue Chain = DAG.getStore( - DAG.getEntryNode(), dl, ValueToStore, StackSlot, + SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode(); + Chain = DAG.getStore( + Chain, dl, ValueToStore, StackSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI)); - return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG).first; + std::pair Tmp = BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); + if (IsStrict) + return DAG.getMergeValues({Tmp.first, Tmp.second}, dl); + + return Tmp.first; } std::pair X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, @@ -18668,7 +18684,8 @@ #endif */ - unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; + bool IsStrict = Op->isStrictFPOpcode(); + unsigned OpNo = IsStrict ? 1 : 0; SDLoc dl(Op); LLVMContext *Context = DAG.getContext(); @@ -18706,7 +18723,7 @@ SDValue Sub; SDValue Chain; // TODO: Are there any fast-math-flags to propagate here? - if (Op.getNode()->isStrictFPOpcode()) { + if (IsStrict) { Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other}, {Op.getOperand(0), XR2F, CLod1}); Chain = SDValue(Sub.getNode(), 1); @@ -18714,26 +18731,26 @@ Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); SDValue Result; - if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) { + if (!IsStrict && Subtarget.hasSSE3() && + shouldUseHorizontalOp(true, DAG, Subtarget)) { // FIXME: Do we need a STRICT version of FHADD? Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); } else { SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1}); - if (Op.getNode()->isStrictFPOpcode()) { + if (IsStrict) { Result = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v2f64, MVT::Other}, {Chain, Shuffle, Sub}); - Chain = SDValue(Result.getNode(), 1); + Chain = Result.getValue(1); } else Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub); } - if (Op.getNode()->isStrictFPOpcode()) { - Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, - DAG.getIntPtrConstant(0, dl)); - return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, Chain); - } - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, - DAG.getIntPtrConstant(0, dl)); + Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, + DAG.getIntPtrConstant(0, dl)); + if (IsStrict) + return DAG.getMergeValues({Result, Chain}, dl); + + return Result; } /// 32-bit unsigned integer to float expansion. @@ -18771,14 +18788,17 @@ // Subtract the bias. // TODO: Are there any fast-math-flags to propagate here? SDValue Chain = Op.getOperand(0); - SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other}, + SDValue Res = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other}, {Chain, Or, Bias}); // Handle final rounding. - return DAG - .getStrictFPExtendOrRound(Sub, SDValue(Sub.getNode(), 1), dl, - Op.getSimpleValueType()) - .first; + if (Op.getSimpleValueType() == MVT::f64) + return Res; + + Chain = Res.getValue(1); + std::pair Tmp = + DAG.getStrictFPExtendOrRound(Res, Chain, dl, Op.getSimpleValueType()); + return DAG.getMergeValues({Tmp.first, Tmp.second}, dl); } // Subtract the bias. @@ -18795,6 +18815,7 @@ if (Op.getSimpleValueType() != MVT::v2f64) return SDValue(); + // FIXME:: Support strict FP! if (Op.getNode()->isStrictFPOpcode()) return SDValue(); @@ -18943,14 +18964,14 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { - unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; - SDValue N0 = Op.getOperand(OpNo); + bool IsStrict = Op->isStrictFPOpcode(); + unsigned OpNo = IsStrict ? 1 : 0; + SDValue Src = Op.getOperand(OpNo); SDLoc dl(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); - MVT SrcVT = N0.getSimpleValueType(); + MVT SrcVT = Src.getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); - SDValue Chain = - Op.getNode()->isStrictFPOpcode() ? Op.getOperand(0) : DAG.getEntryNode(); + SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); if (DstVT == MVT::f128) return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT)); @@ -18970,12 +18991,12 @@ // Promote i32 to i64 and use a signed conversion on 64-bit targets. if (SrcVT == MVT::i32 && Subtarget.is64Bit()) { - N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, N0); - if (Op.getNode()->isStrictFPOpcode()) { + Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Src); + if (IsStrict) return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other}, - {Chain, N0}); - } - return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, N0); + {Chain, Src}); + + return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); } if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget)) @@ -18992,20 +19013,26 @@ SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64); if (SrcVT == MVT::i32) { SDValue OffsetSlot = DAG.getMemBasePlusOffset(StackSlot, 4, dl); - SDValue Store1 = DAG.getStore(Chain, dl, Op.getOperand(OpNo), StackSlot, - MachinePointerInfo()); + SDValue Store1 = + DAG.getStore(Chain, dl, Src, StackSlot, MachinePointerInfo()); SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32), OffsetSlot, MachinePointerInfo()); - return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG).first; + std::pair Tmp = + BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); + if (IsStrict) + return DAG.getMergeValues({Tmp.first, Tmp.second}, dl); + + return Tmp.first; } assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); - SDValue ValueToStore = Op.getOperand(OpNo); - if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) + SDValue ValueToStore = Src; + if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) { // Bitcasting to f64 here allows us to do a single 64-bit store from // an SSE register, avoiding the store forwarding penalty that would come // with two 32-bit stores. ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); + } SDValue Store = DAG.getStore(Chain, dl, ValueToStore, StackSlot, MachinePointerInfo()); // For i64 source, we need to add the appropriate power of 2 if the input @@ -19022,7 +19049,7 @@ SDValue Ops[] = { Store, StackSlot }; SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, MMO); - Chain = SDValue(Fild.getNode(), 1); + Chain = Fild.getValue(1); APInt FF(32, 0x5F800000ULL); @@ -19047,15 +19074,15 @@ ISD::EXTLOAD, dl, MVT::f80, Chain, FudgePtr, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, /* Alignment = */ 4); - Chain = SDValue(Fudge.getNode(), 1); + Chain = Fudge.getValue(1); // Extend everything to 80 bits to force it to be done on x87. // TODO: Are there any fast-math-flags to propagate here? - if (Op.getNode()->isStrictFPOpcode()) { + if (IsStrict) { SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other}, {Chain, Fild, Fudge}); - return DAG.getNode( - ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other}, - {SDValue(Add.getNode(), 1), Add, DAG.getIntPtrConstant(0, dl)}); + Chain = Add.getValue(1); + return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other}, + {Chain, Add, DAG.getIntPtrConstant(0, dl)}); } SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, @@ -19110,10 +19137,7 @@ int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); - if (IsStrict) - Chain = Op.getOperand(0); - else - Chain = DAG.getEntryNode(); + Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment. Index: llvm/test/CodeGen/X86/fp-intrinsics.ll =================================================================== --- llvm/test/CodeGen/X86/fp-intrinsics.ll +++ llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -1951,8 +1951,92 @@ } ; Verify that sitofp(%x) isn't simplified when the rounding mode is -; unknown. The expansion should have only one conversion instruction. +; unknown. ; Verify that no gross errors happen. +define double @sifdb(i8 %x) #0 { +; X87-LABEL: sifdb: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: sifdb: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: sifdb: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movsbl %dil, %eax +; SSE-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: sifdb: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movsbl %dil, %eax +; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.sitofp.f64.i8(i8 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +define double @sifdw(i16 %x) #0 { +; X87-LABEL: sifdw: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: sifdw: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: sifdw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movswl %di, %eax +; SSE-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: sifdw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movswl %di, %eax +; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.sitofp.f64.i16(i16 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + define double @sifdi(i32 %x) #0 { ; X87-LABEL: sifdi: ; X87: # %bb.0: # %entry @@ -1992,6 +2076,90 @@ ret double %result } +define float @siffb(i8 %x) #0 { +; X87-LABEL: siffb: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: siffb: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0 +; X86-SSE-NEXT: movss %xmm0, (%esp) +; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: popl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: siffb: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movsbl %dil, %eax +; SSE-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: siffb: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movsbl %dil, %eax +; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call float @llvm.experimental.constrained.sitofp.f32.i8(i8 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + +define float @siffw(i16 %x) #0 { +; X87-LABEL: siffw: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: siffw: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0 +; X86-SSE-NEXT: movss %xmm0, (%esp) +; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: popl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: siffw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movswl %di, %eax +; SSE-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: siffw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movswl %di, %eax +; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + define float @siffi(i32 %x) #0 { ; X87-LABEL: siffi: ; X87: # %bb.0: # %entry @@ -2118,8 +2286,92 @@ } ; Verify that uitofp(%x) isn't simplified when the rounding mode is -; unknown. Expansions from i32 should have only one conversion instruction. +; unknown. ; Verify that no gross errors happen. +define double @uifdb(i8 %x) #0 { +; X87-LABEL: uifdb: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: uifdb: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: uifdb: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movzbl %dil, %eax +; SSE-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: uifdb: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movzbl %dil, %eax +; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.uitofp.f64.i8(i8 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +define double @uifdw(i16 %x) #0 { +; X87-LABEL: uifdw: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: fildl (%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: uifdw: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0 +; X86-SSE-NEXT: movsd %xmm0, (%esp) +; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: uifdw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movzwl %di, %eax +; SSE-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: uifdw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movzwl %di, %eax +; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call double @llvm.experimental.constrained.uitofp.f64.i16(i16 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + define double @uifdi(i32 %x) #0 { ; X87-LABEL: uifdi: ; X87: # %bb.0: # %entry @@ -2236,6 +2488,90 @@ ret double %result } +define float @uiffb(i8 %x) #0 { +; X87-LABEL: uiffb: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: uiffb: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0 +; X86-SSE-NEXT: movss %xmm0, (%esp) +; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: popl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: uiffb: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movzbl %dil, %eax +; SSE-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: uiffb: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movzbl %dil, %eax +; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call float @llvm.experimental.constrained.uitofp.f32.i8(i8 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + +define float @uiffw(i16 %x) #0 { +; X87-LABEL: uiffw: +; X87: # %bb.0: # %entry +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: fildl (%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl +; +; X86-SSE-LABEL: uiffw: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0 +; X86-SSE-NEXT: movss %xmm0, (%esp) +; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: popl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; SSE-LABEL: uiffw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movzwl %di, %eax +; SSE-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: uiffw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movzwl %di, %eax +; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + define float @uiffi(i32 %x) #0 { ; X87-LABEL: uiffi: ; X87: # %bb.0: # %entry @@ -2330,11 +2666,11 @@ ; SSE-LABEL: uiffl: ; SSE: # %bb.0: # %entry ; SSE-NEXT: testq %rdi, %rdi -; SSE-NEXT: js .LBB42_1 +; SSE-NEXT: js .LBB52_1 ; SSE-NEXT: # %bb.2: # %entry ; SSE-NEXT: cvtsi2ss %rdi, %xmm0 ; SSE-NEXT: retq -; SSE-NEXT: .LBB42_1: +; SSE-NEXT: .LBB52_1: ; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: shrq %rax ; SSE-NEXT: andl $1, %edi @@ -2346,11 +2682,11 @@ ; AVX1-LABEL: uiffl: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: testq %rdi, %rdi -; AVX1-NEXT: js .LBB42_1 +; AVX1-NEXT: js .LBB52_1 ; AVX1-NEXT: # %bb.2: # %entry ; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 ; AVX1-NEXT: retq -; AVX1-NEXT: .LBB42_1: +; AVX1-NEXT: .LBB52_1: ; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: shrq %rax ; AVX1-NEXT: andl $1, %edi @@ -2410,11 +2746,19 @@ declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata) declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata) declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i8(i8, metadata, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i16(i16, metadata, metadata) declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) -declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i8(i8, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i8(i8, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i16(i16, metadata, metadata) declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i8(i8, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata) declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata) declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata) Index: llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll =================================================================== --- llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -5771,17 +5771,17 @@ ; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsi2sd %eax, %xmm0 -; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_sitofp_v2f64_v2i32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vpextrd $1, %xmm0, %eax +; AVX-NEXT: vextractps $1, %xmm0, %eax ; AVX-NEXT: vcvtsi2sd %eax, %xmm1, %xmm1 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: vcvtsi2sd %eax, %xmm2, %xmm0 -; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq entry: %result = call <2 x double> @@ -5806,7 +5806,7 @@ ; ; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vpextrd $1, %xmm0, %eax +; AVX-NEXT: vextractps $1, %xmm0, %eax ; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0 @@ -5829,8 +5829,8 @@ ; CHECK-NEXT: movq %xmm0, %rax ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsi2sd %rax, %xmm0 -; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_sitofp_v2f64_v2i64: @@ -5839,7 +5839,7 @@ ; AVX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX-NEXT: vmovq %xmm0, %rax ; AVX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq entry: %result = call <2 x double> @@ -5893,16 +5893,16 @@ ; CHECK-NEXT: cvtsi2sd %eax, %xmm0 ; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: movapd %xmm2, %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_sitofp_v3f64_v3i32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vpextrd $1, %xmm0, %eax +; AVX-NEXT: vextractps $1, %xmm0, %eax ; AVX-NEXT: vcvtsi2sd %eax, %xmm1, %xmm1 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: vcvtsi2sd %eax, %xmm2, %xmm2 -; AVX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; AVX-NEXT: vpextrd $2, %xmm0, %eax ; AVX-NEXT: vcvtsi2sd %eax, %xmm3, %xmm0 ; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 @@ -5935,7 +5935,7 @@ ; ; AVX-LABEL: constrained_vector_sitofp_v3f32_v3i32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vpextrd $1, %xmm0, %eax +; AVX-NEXT: vextractps $1, %xmm0, %eax ; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: vcvtsi2ss %eax, %xmm2, %xmm2 @@ -5968,7 +5968,7 @@ ; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 @@ -5981,7 +5981,7 @@ ; AVX512-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 -; AVX512-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 @@ -6050,7 +6050,7 @@ ; CHECK-NEXT: movd %xmm1, %eax ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsi2sd %eax, %xmm1 -; CHECK-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] ; CHECK-NEXT: movd %xmm1, %eax ; CHECK-NEXT: cvtsi2sd %eax, %xmm3 @@ -6058,8 +6058,8 @@ ; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsi2sd %eax, %xmm1 -; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0] -; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] +; CHECK-NEXT: movapd %xmm2, %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_sitofp_v4f64_v4i32: @@ -6101,16 +6101,16 @@ ; CHECK-NEXT: movq %xmm0, %rax ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsi2sd %rax, %xmm0 -; CHECK-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] ; CHECK-NEXT: movq %xmm1, %rax ; CHECK-NEXT: cvtsi2sd %rax, %xmm3 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] ; CHECK-NEXT: movq %xmm0, %rax ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsi2sd %rax, %xmm0 -; CHECK-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm0[0] -; CHECK-NEXT: movaps %xmm2, %xmm0 -; CHECK-NEXT: movaps %xmm3, %xmm1 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0] +; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: movapd %xmm3, %xmm1 ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_sitofp_v4f64_v4i64: @@ -6120,12 +6120,12 @@ ; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX1-NEXT: vmovq %xmm1, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -6136,12 +6136,12 @@ ; AVX512-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX512-NEXT: vmovq %xmm1, %rax ; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX512-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512-NEXT: vpextrq $1, %xmm0, %rax ; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX512-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq entry: @@ -6351,26 +6351,26 @@ ; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: xorps %xmm0, %xmm0 ; CHECK-NEXT: cvtsi2sd %rax, %xmm0 -; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpextrd $1, %xmm0, %eax +; AVX1-NEXT: vextractps $1, %xmm0, %eax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_uitofp_v2f64_v2i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpextrd $1, %xmm0, %eax +; AVX512-NEXT: vextractps $1, %xmm0, %eax ; AVX512-NEXT: vcvtusi2sd %eax, %xmm1, %xmm1 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vcvtusi2sd %eax, %xmm2, %xmm0 -; AVX512-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: retq entry: %result = call <2 x double> @@ -6395,7 +6395,7 @@ ; ; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpextrd $1, %xmm0, %eax +; AVX1-NEXT: vextractps $1, %xmm0, %eax ; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 @@ -6404,7 +6404,7 @@ ; ; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpextrd $1, %xmm0, %eax +; AVX512-NEXT: vextractps $1, %xmm0, %eax ; AVX512-NEXT: vcvtusi2ss %eax, %xmm1, %xmm1 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vcvtusi2ss %eax, %xmm2, %xmm0 @@ -6460,7 +6460,7 @@ ; AVX512-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vcvtusi2sd %rax, %xmm2, %xmm0 -; AVX512-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512-NEXT: retq entry: %result = call <2 x double> @@ -6577,16 +6577,16 @@ ; CHECK-NEXT: cvtsi2sd %rax, %xmm0 ; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: movapd %xmm2, %xmm0 ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpextrd $1, %xmm0, %eax +; AVX1-NEXT: vextractps $1, %xmm0, %eax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; AVX1-NEXT: vpextrd $2, %xmm0, %eax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 @@ -6594,11 +6594,11 @@ ; ; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpextrd $1, %xmm0, %eax +; AVX512-NEXT: vextractps $1, %xmm0, %eax ; AVX512-NEXT: vcvtusi2sd %eax, %xmm1, %xmm1 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vcvtusi2sd %eax, %xmm2, %xmm2 -; AVX512-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; AVX512-NEXT: vpextrd $2, %xmm0, %eax ; AVX512-NEXT: vcvtusi2sd %eax, %xmm3, %xmm0 ; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 @@ -6631,7 +6631,7 @@ ; ; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpextrd $1, %xmm0, %eax +; AVX1-NEXT: vextractps $1, %xmm0, %eax ; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 @@ -6643,7 +6643,7 @@ ; ; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpextrd $1, %xmm0, %eax +; AVX512-NEXT: vextractps $1, %xmm0, %eax ; AVX512-NEXT: vcvtusi2ss %eax, %xmm1, %xmm1 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vcvtusi2ss %eax, %xmm2, %xmm2 @@ -6715,7 +6715,7 @@ ; AVX512-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2 -; AVX512-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0 @@ -6860,7 +6860,7 @@ ; CHECK-NEXT: movd %xmm1, %eax ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsi2sd %rax, %xmm1 -; CHECK-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] ; CHECK-NEXT: movd %xmm1, %eax ; CHECK-NEXT: cvtsi2sd %rax, %xmm3 @@ -6868,37 +6868,37 @@ ; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: cvtsi2sd %rax, %xmm1 -; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0] -; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] +; CHECK-NEXT: movapd %xmm2, %xmm0 ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpextrd $3, %xmm0, %eax +; AVX1-NEXT: vextractps $3, %xmm0, %eax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 -; AVX1-NEXT: vpextrd $2, %xmm0, %eax +; AVX1-NEXT: vextractps $2, %xmm0, %eax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX1-NEXT: vpextrd $1, %xmm0, %eax +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX1-NEXT: vextractps $1, %xmm0, %eax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpextrd $3, %xmm0, %eax +; AVX512-NEXT: vextractps $3, %xmm0, %eax ; AVX512-NEXT: vcvtusi2sd %eax, %xmm1, %xmm1 -; AVX512-NEXT: vpextrd $2, %xmm0, %eax +; AVX512-NEXT: vextractps $2, %xmm0, %eax ; AVX512-NEXT: vcvtusi2sd %eax, %xmm2, %xmm2 -; AVX512-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX512-NEXT: vpextrd $1, %xmm0, %eax +; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX512-NEXT: vextractps $1, %xmm0, %eax ; AVX512-NEXT: vcvtusi2sd %eax, %xmm3, %xmm2 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vcvtusi2sd %eax, %xmm3, %xmm0 -; AVX512-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq entry: @@ -6935,7 +6935,7 @@ ; ; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpextrd $1, %xmm0, %eax +; AVX1-NEXT: vextractps $1, %xmm0, %eax ; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 @@ -6950,7 +6950,7 @@ ; ; AVX512-LABEL: constrained_vector_uitofp_v4f32_v4i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpextrd $1, %xmm0, %eax +; AVX512-NEXT: vextractps $1, %xmm0, %eax ; AVX512-NEXT: vcvtusi2ss %eax, %xmm1, %xmm1 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vcvtusi2ss %eax, %xmm2, %xmm2 @@ -7038,12 +7038,12 @@ ; AVX512-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2 ; AVX512-NEXT: vmovq %xmm1, %rax ; AVX512-NEXT: vcvtusi2sd %rax, %xmm3, %xmm1 -; AVX512-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512-NEXT: vpextrq $1, %xmm0, %rax ; AVX512-NEXT: vcvtusi2sd %rax, %xmm3, %xmm2 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0 -; AVX512-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq entry: