diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1042,6 +1042,10 @@ return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; } + /// Return true if we want to legalize the node \p N as libcall without + /// querying the operation action. + virtual bool isLegalizedAsLibCall(SDNode *N) const { return false; } + /// Custom method defined by each target to indicate if an operation which /// may require a scale is supported natively by the target. /// If not, the operation is illegal. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -977,6 +977,9 @@ "Unexpected illegal type!"); #endif + if (TLI.isLegalizedAsLibCall(Node)) + return ConvertNodeToLibcall(Node); + // Figure out the correct action; the way to query this varies by opcode TargetLowering::LegalizeAction Action = TargetLowering::Legal; bool SimpleFinishLegalizing = true; @@ -4305,11 +4308,45 @@ Results.push_back(ExpandLibCall(LC, Node, false)); break; } + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_FP_EXTEND: + case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_TO_FP16: { - RTLIB::Libcall LC = - RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16); - assert(LC != RTLIB::UNKNOWN_LIBCALL && - "Unable to expand strict_fp_to_fp16"); + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + switch (Opc) { + default: + llvm_unreachable("Unable to legalize as libcall"); + case ISD::STRICT_FP_TO_FP16: + LC = RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16); + break; + case ISD::STRICT_FP_TO_SINT: + LC = RTLIB::getFPTOSINT(Node->getOperand(1).getValueType(), + Node->getValueType(0)); + break; + case ISD::STRICT_FP_TO_UINT: + LC = RTLIB::getFPTOUINT(Node->getOperand(1).getValueType(), + Node->getValueType(0)); + break; + case ISD::STRICT_SINT_TO_FP: + LC = RTLIB::getSINTTOFP(Node->getOperand(1).getValueType(), + Node->getValueType(0)); + break; + case ISD::STRICT_UINT_TO_FP: + LC = RTLIB::getUINTTOFP(Node->getOperand(1).getValueType(), + Node->getValueType(0)); + break; + case ISD::STRICT_FP_EXTEND: + LC = RTLIB::getFPEXT(Node->getOperand(1).getValueType(), + Node->getValueType(0)); + break; + case ISD::STRICT_FP_ROUND: + LC = RTLIB::getFPROUND(Node->getOperand(1).getValueType(), + Node->getValueType(0)); + break; + } TargetLowering::MakeLibCallOptions CallOptions; std::pair Tmp = TLI.makeLibCall(DAG, LC, Node->getValueType(0), Node->getOperand(1), @@ -4318,6 +4355,59 @@ Results.push_back(Tmp.second); break; } + case ISD::FP_TO_SINT: { + Results.push_back( + ExpandLibCall(RTLIB::getFPTOSINT(Node->getOperand(0).getValueType(), + Node->getValueType(0)), + Node, true)); + break; + } + case ISD::FP_TO_UINT: { + Results.push_back( + ExpandLibCall(RTLIB::getFPTOUINT(Node->getOperand(0).getValueType(), + Node->getValueType(0)), + Node, false)); + break; + } + case ISD::SINT_TO_FP: { + Results.push_back( + ExpandLibCall(RTLIB::getSINTTOFP(Node->getOperand(0).getValueType(), + Node->getValueType(0)), + Node, true)); + break; + } + case ISD::UINT_TO_FP: { + Results.push_back( + ExpandLibCall(RTLIB::getUINTTOFP(Node->getOperand(0).getValueType(), + Node->getValueType(0)), + Node, false)); + break; + } + case ISD::FP_EXTEND: { + Results.push_back( + ExpandLibCall(RTLIB::getFPEXT(Node->getOperand(0).getValueType(), + Node->getValueType(0)), + Node, false)); + break; + } + case ISD::FP_ROUND: { + // X = FP_ROUND(Y, TRUNC) + // TRUNC is a flag, which is always an integer that is zero or one. + // If TRUNC is 0, this is a normal rounding, if it is 1, this FP_ROUND + // is known to not change the value of Y. + // We can only expand it into libcall if the TRUNC is 0. + const ConstantSDNode *TRUNC = dyn_cast(Node->getOperand(1)); + assert(TRUNC && TRUNC->isNullValue() && + "Unable to expand as libcall if it is not normal rounding"); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair Tmp = TLI.makeLibCall( + DAG, + RTLIB::getFPROUND(Node->getOperand(0).getValueType(), + Node->getValueType(0)), + Node->getValueType(0), Node->getOperand(0), CallOptions, SDLoc(Node)); + Results.push_back(Tmp.first); + break; + } case ISD::FSUB: case ISD::STRICT_FSUB: ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -530,6 +530,8 @@ bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const override; + bool isLegalizedAsLibCall(SDNode *N) const override; + bool isTruncateFree(Type *Ty1, Type *Ty2) const override; bool isTruncateFree(EVT VT1, EVT VT2) const override; @@ -902,8 +904,6 @@ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -368,12 +368,12 @@ // Virtually no operation on f128 is legal, but LLVM can't expand them when // there's a valid register class, so we need custom operations in most cases. setOperationAction(ISD::FABS, MVT::f128, Expand); - setOperationAction(ISD::FADD, MVT::f128, Custom); + setOperationAction(ISD::FADD, MVT::f128, LibCall); setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); setOperationAction(ISD::FCOS, MVT::f128, Expand); - setOperationAction(ISD::FDIV, MVT::f128, Custom); + setOperationAction(ISD::FDIV, MVT::f128, LibCall); setOperationAction(ISD::FMA, MVT::f128, Expand); - setOperationAction(ISD::FMUL, MVT::f128, Custom); + setOperationAction(ISD::FMUL, MVT::f128, LibCall); setOperationAction(ISD::FNEG, MVT::f128, Expand); setOperationAction(ISD::FPOW, MVT::f128, Expand); setOperationAction(ISD::FREM, MVT::f128, Expand); @@ -381,7 +381,7 @@ setOperationAction(ISD::FSIN, MVT::f128, Expand); setOperationAction(ISD::FSINCOS, MVT::f128, Expand); setOperationAction(ISD::FSQRT, MVT::f128, Expand); - setOperationAction(ISD::FSUB, MVT::f128, Custom); + setOperationAction(ISD::FSUB, MVT::f128, LibCall); setOperationAction(ISD::FTRUNC, MVT::f128, Expand); setOperationAction(ISD::SETCC, MVT::f128, Custom); setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom); @@ -389,7 +389,6 @@ setOperationAction(ISD::BR_CC, MVT::f128, Custom); setOperationAction(ISD::SELECT, MVT::f128, Custom); setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); - setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); // Lowering for many of the conversions is actually specified by the non-f128 // type. The LowerXXX function will be trivial when f128 isn't involved. @@ -1160,6 +1159,27 @@ PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); } +bool AArch64TargetLowering::isLegalizedAsLibCall(SDNode *N) const { + switch (N->getOpcode()) { + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_FP_EXTEND: + case ISD::FP_EXTEND: + return N->getValueType(0) == MVT::f128; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FP_ROUND: + return N->getOperand(0).getValueType() == MVT::f128; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_ROUND: + return N->getOperand(1).getValueType() == MVT::f128; + } + return false; +} + void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) { assert(VT.isVector() && "VT should be a vector type"); @@ -2775,20 +2795,6 @@ return std::make_pair(Value, Overflow); } -SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const { - bool IsStrict = Op->isStrictFPOpcode(); - unsigned Offset = IsStrict ? 1 : 0; - SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); - SmallVector Ops(Op->op_begin() + Offset, Op->op_end()); - MakeLibCallOptions CallOptions; - SDValue Result; - SDLoc dl(Op); - std::tie(Result, Chain) = makeLibCall(DAG, Call, Op.getValueType(), Ops, - CallOptions, dl, Chain); - return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result; -} - SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const { if (useSVEForFixedLengthVectorVT(Op.getValueType())) return LowerToScalableOp(Op, DAG); @@ -2970,12 +2976,8 @@ if (Op.getValueType().isScalableVector()) return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU); - assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); - - RTLIB::Libcall LC; - LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); - - return LowerF128Call(Op, DAG, LC); + llvm_unreachable("Unexpected lowering"); + return SDValue(); } SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, @@ -2987,28 +2989,11 @@ SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0); EVT SrcVT = SrcVal.getValueType(); - if (SrcVT != MVT::f128) { - // Expand cases where the input is a vector bigger than NEON. - if (useSVEForFixedLengthVectorVT(SrcVT)) - return SDValue(); - - // It's legal except when f128 is involved - return Op; - } - - RTLIB::Libcall LC; - LC = RTLIB::getFPROUND(SrcVT, Op.getValueType()); + // Expand cases where the input is a vector bigger than NEON. + if (useSVEForFixedLengthVectorVT(SrcVT)) + return SDValue(); - // FP_ROUND node has a second operand indicating whether it is known to be - // precise. That doesn't take part in the LibCall so we can't directly use - // LowerF128Call. - MakeLibCallOptions CallOptions; - SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); - SDValue Result; - SDLoc dl(Op); - std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal, - CallOptions, dl, Chain); - return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result; + return Op; } SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op, @@ -3078,19 +3063,7 @@ DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal)); } - if (SrcVal.getValueType() != MVT::f128) { - // It's legal except when f128 is involved - return Op; - } - - RTLIB::Libcall LC; - if (Op.getOpcode() == ISD::FP_TO_SINT || - Op.getOpcode() == ISD::STRICT_FP_TO_SINT) - LC = RTLIB::getFPTOSINT(SrcVal.getValueType(), Op.getValueType()); - else - LC = RTLIB::getFPTOUINT(SrcVal.getValueType(), Op.getValueType()); - - return LowerF128Call(Op, DAG, LC); + return Op; } SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op, @@ -3154,19 +3127,7 @@ if (SrcVal.getValueType() == MVT::i128) return SDValue(); - // Other conversions are legal, unless it's to the completely software-based - // fp128. - if (Op.getValueType() != MVT::f128) - return Op; - - RTLIB::Libcall LC; - if (Op.getOpcode() == ISD::SINT_TO_FP || - Op.getOpcode() == ISD::STRICT_SINT_TO_FP) - LC = RTLIB::getSINTTOFP(SrcVal.getValueType(), Op.getValueType()); - else - LC = RTLIB::getUINTTOFP(SrcVal.getValueType(), Op.getValueType()); - - return LowerF128Call(Op, DAG, LC); + return Op; } SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, @@ -3967,22 +3928,14 @@ case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::FADD: - if (Op.getValueType() == MVT::f128) - return LowerF128Call(Op, DAG, RTLIB::ADD_F128); return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED); case ISD::FSUB: - if (Op.getValueType() == MVT::f128) - return LowerF128Call(Op, DAG, RTLIB::SUB_F128); return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED); case ISD::FMUL: - if (Op.getValueType() == MVT::f128) - return LowerF128Call(Op, DAG, RTLIB::MUL_F128); return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED); case ISD::FMA: return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED); case ISD::FDIV: - if (Op.getValueType() == MVT::f128) - return LowerF128Call(Op, DAG, RTLIB::DIV_F128); return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED); case ISD::FNEG: return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -591,6 +591,9 @@ setCondCodeAction(ISD::SETONE, MVT::f32, Expand); setCondCodeAction(ISD::SETONE, MVT::f64, Expand); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); + if (Subtarget.has64BitSupport()) { // They also have instructions for converting between i64 and fp. setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1021,6 +1021,8 @@ bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; + bool isLegalizedAsLibCall(SDNode *N) const override; + bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override { @@ -1529,10 +1531,6 @@ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; - - SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -717,22 +717,20 @@ setOperationAction(ISD::FSQRT, MVT::f128, LibCall); setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall); - setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom); - // We need to custom handle any FP_ROUND with an f128 input, but - // LegalizeDAG uses the result type to know when to run a custom handler. - // So we have to list all legal floating point result types here. + setOperationAction(ISD::FP_EXTEND, MVT::f128, LibCall); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, LibCall); + if (isTypeLegal(MVT::f32)) { - setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); - setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); } if (isTypeLegal(MVT::f64)) { - setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); - setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal); } if (isTypeLegal(MVT::f80)) { - setOperationAction(ISD::FP_ROUND, MVT::f80, Custom); - setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f80, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal); } setOperationAction(ISD::SETCC, MVT::f128, Custom); @@ -2040,6 +2038,39 @@ IsStrictFPEnabled = true; } +bool X86TargetLowering::isLegalizedAsLibCall(SDNode *N) const { + unsigned SrcIdx = N->isStrictFPOpcode() ? 1 : 0; + EVT DestVT = N->getValueType(0); + + switch (N->getOpcode()) { + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: { + // Don't legalize it as libcall if the SrcVT can be promoted. + EVT SrcVT = N->getOperand(SrcIdx).getValueType(); + return DestVT == MVT::f128 && SrcVT != MVT::i16 && + getOperationAction(N->getOpcode(), SrcVT) != Promote; + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: { + // Don't legalize it as libcall if the DestVT can be promoted. + EVT SrcVT = N->getOperand(SrcIdx).getValueType(); + return SrcVT == MVT::f128 && DestVT != MVT::i16 && + getOperationAction(N->getOpcode(), DestVT) != Promote; + } + case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: + return DestVT == MVT::f128; + case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: + return N->getOperand(SrcIdx).getValueType() == MVT::f128; + } + return false; +} + // This has so far only been implemented for 64-bit MachO. bool X86TargetLowering::useLoadStackGuardNode() const { return Subtarget.isTargetMachO() && Subtarget.is64Bit(); @@ -19788,9 +19819,6 @@ return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext); } - if (VT == MVT::f128) - return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT)); - SDValue ValueToStore = Src; if (SrcVT == MVT::i64 && Subtarget.hasSSE2() && !Subtarget.is64Bit()) // Bitcasting to f64 here allows us to do a single 64-bit store from @@ -20229,9 +20257,6 @@ MVT DstVT = Op->getSimpleValueType(0); SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); - if (DstVT == MVT::f128) - return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT)); - if (DstVT.isVector()) return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); @@ -21216,25 +21241,6 @@ if (UseSSEReg && IsSigned) return Op; - // fp128 needs to use a libcall. - if (SrcVT == MVT::f128) { - RTLIB::Libcall LC; - if (IsSigned) - LC = RTLIB::getFPTOSINT(SrcVT, VT); - else - LC = RTLIB::getFPTOUINT(SrcVT, VT); - - SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); - MakeLibCallOptions CallOptions; - std::pair Tmp = makeLibCall(DAG, LC, VT, Src, CallOptions, - SDLoc(Op), Chain); - - if (IsStrict) - return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl); - - return Tmp.first; - } - // Fall back to X87. SDValue Chain; if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned, Chain)) { @@ -21310,11 +21316,6 @@ SDValue In = Op.getOperand(IsStrict ? 1 : 0); MVT SVT = In.getSimpleValueType(); - if (VT == MVT::f128) { - RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT); - return LowerF128Call(Op, DAG, LC); - } - assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); SDValue Res = @@ -21325,35 +21326,6 @@ return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res); } -SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { - bool IsStrict = Op->isStrictFPOpcode(); - - MVT VT = Op.getSimpleValueType(); - SDValue In = Op.getOperand(IsStrict ? 1 : 0); - MVT SVT = In.getSimpleValueType(); - - // It's legal except when f128 is involved - if (SVT != MVT::f128) - return Op; - - RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, VT); - - // FP_ROUND node has a second operand indicating whether it is known to be - // precise. That doesn't take part in the LibCall so we can't directly use - // LowerF128Call. - - SDLoc dl(Op); - SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); - MakeLibCallOptions CallOptions; - std::pair Tmp = makeLibCall(DAG, LC, VT, In, CallOptions, - dl, Chain); - - if (IsStrict) - return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl); - - return Tmp.first; -} - static SDValue LowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG) { bool IsStrict = Op->isStrictFPOpcode(); SDValue Src = Op.getOperand(IsStrict ? 1 : 0); @@ -29656,25 +29628,6 @@ return NOOP; } -SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const { - - bool IsStrict = Op->isStrictFPOpcode(); - unsigned Offset = IsStrict ? 1 : 0; - SmallVector Ops(Op->op_begin() + Offset, Op->op_end()); - - SDLoc dl(Op); - SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); - MakeLibCallOptions CallOptions; - std::pair Tmp = makeLibCall(DAG, Call, MVT::f128, Ops, - CallOptions, dl, Chain); - - if (IsStrict) - return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl); - - return Tmp.first; -} - // Custom split CVTPS2PH with wide types. static SDValue LowerCVTPS2PH(SDValue Op, SelectionDAG &DAG) { SDLoc dl(Op); @@ -29741,8 +29694,6 @@ case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG); - case ISD::FP_ROUND: - case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::FP16_TO_FP: case ISD::STRICT_FP16_TO_FP: return LowerFP16_TO_FP(Op, DAG); case ISD::FP_TO_FP16: diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll --- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll @@ -7,16 +7,11 @@ define fp128 @test_add() { ; CHECK-LABEL: test_add: ; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: adrp x8, lhs ; CHECK-NEXT: ldr q0, [x8, :lo12:lhs] ; CHECK-NEXT: adrp x8, rhs ; CHECK-NEXT: ldr q1, [x8, :lo12:rhs] -; CHECK-NEXT: bl __addtf3 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-NEXT: b __addtf3 %lhs = load fp128, fp128* @lhs, align 16 %rhs = load fp128, fp128* @rhs, align 16 @@ -28,16 +23,11 @@ define fp128 @test_sub() { ; CHECK-LABEL: test_sub: ; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: adrp x8, lhs ; CHECK-NEXT: ldr q0, [x8, :lo12:lhs] ; CHECK-NEXT: adrp x8, rhs ; CHECK-NEXT: ldr q1, [x8, :lo12:rhs] -; CHECK-NEXT: bl __subtf3 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-NEXT: b __subtf3 %lhs = load fp128, fp128* @lhs, align 16 %rhs = load fp128, fp128* @rhs, align 16 @@ -49,16 +39,11 @@ define fp128 @test_mul() { ; CHECK-LABEL: test_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: adrp x8, lhs ; CHECK-NEXT: ldr q0, [x8, :lo12:lhs] ; CHECK-NEXT: adrp x8, rhs ; CHECK-NEXT: ldr q1, [x8, :lo12:rhs] -; CHECK-NEXT: bl __multf3 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-NEXT: b __multf3 %lhs = load fp128, fp128* @lhs, align 16 %rhs = load fp128, fp128* @rhs, align 16 @@ -70,16 +55,11 @@ define fp128 @test_div() { ; CHECK-LABEL: test_div: ; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: adrp x8, lhs ; CHECK-NEXT: ldr q0, [x8, :lo12:lhs] ; CHECK-NEXT: adrp x8, rhs ; CHECK-NEXT: ldr q1, [x8, :lo12:rhs] -; CHECK-NEXT: bl __divtf3 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-NEXT: b __divtf3 %lhs = load fp128, fp128* @lhs, align 16 %rhs = load fp128, fp128* @rhs, align 16 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll @@ -73,13 +73,10 @@ define fp128 @test_v1f128(<1 x fp128> %a, fp128 %s) nounwind { ; CHECK-LABEL: test_v1f128: ; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v0.16b ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: mov v1.16b, v2.16b -; CHECK-NEXT: bl __addtf3 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-NEXT: b __addtf3 %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 %s, <1 x fp128> %a) ret fp128 %b } @@ -151,10 +148,9 @@ ; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: bl __addtf3 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: bl __addtf3 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 // =32 -; CHECK-NEXT: ret +; CHECK-NEXT: b __addtf3 %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 %s, <2 x fp128> %a) ret fp128 %b } @@ -162,10 +158,7 @@ define fp128 @test_v2f128_neutral(<2 x fp128> %a) nounwind { ; CHECK-LABEL: test_v2f128_neutral: ; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: bl __addtf3 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-NEXT: b __addtf3 %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a) ret fp128 %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll @@ -84,10 +84,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-LABEL: test_v2f128: ; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: bl __addtf3 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-NEXT: b __addtf3 %b = call reassoc fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a) ret fp128 %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll @@ -59,10 +59,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind { ; CHECK-LABEL: test_v2f128: ; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: bl __multf3 -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-NEXT: b __multf3 %b = call fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 0xL00000000000000003fff00000000000000, <2 x fp128> %a) ret fp128 %b } diff --git a/llvm/test/CodeGen/X86/fp128-load.ll b/llvm/test/CodeGen/X86/fp128-load.ll --- a/llvm/test/CodeGen/X86/fp128-load.ll +++ b/llvm/test/CodeGen/X86/fp128-load.ll @@ -22,14 +22,9 @@ define fp128 @TestLoadExtend(fp128 %x, i32 %n) { ; CHECK-LABEL: TestLoadExtend: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movslq %edi, %rax ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: callq __extendsftf2 -; CHECK-NEXT: popq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq +; CHECK-NEXT: jmp __extendsftf2 # TAILCALL entry: %idxprom = sext i32 %n to i64 %arrayidx = getelementptr inbounds [2 x float], [2 x float]* @TestLoadExtend.data, i64 0, i64 %idxprom diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll --- a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll +++ b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll @@ -714,19 +714,9 @@ ; X86-AVX512-LIN-NEXT: addl $28, %esp ; X86-AVX512-LIN-NEXT: retl ; -; X64-AVX512-WIN-LABEL: t_to_u32: -; X64-AVX512-WIN: # %bb.0: -; X64-AVX512-WIN-NEXT: subq $40, %rsp -; X64-AVX512-WIN-NEXT: callq __fixunstfsi -; X64-AVX512-WIN-NEXT: addq $40, %rsp -; X64-AVX512-WIN-NEXT: retq -; -; X64-AVX512-LIN-LABEL: t_to_u32: -; X64-AVX512-LIN: # %bb.0: -; X64-AVX512-LIN-NEXT: pushq %rax -; X64-AVX512-LIN-NEXT: callq __fixunstfsi -; X64-AVX512-LIN-NEXT: popq %rcx -; X64-AVX512-LIN-NEXT: retq +; X64-AVX512-LABEL: t_to_u32: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: jmp __fixunstfsi # TAILCALL ; ; X86-SSE-WIN-LABEL: t_to_u32: ; X86-SSE-WIN: # %bb.0: @@ -749,19 +739,9 @@ ; X86-SSE-LIN-NEXT: addl $28, %esp ; X86-SSE-LIN-NEXT: retl ; -; X64-SSE-WIN-LABEL: t_to_u32: -; X64-SSE-WIN: # %bb.0: -; X64-SSE-WIN-NEXT: subq $40, %rsp -; X64-SSE-WIN-NEXT: callq __fixunstfsi -; X64-SSE-WIN-NEXT: addq $40, %rsp -; X64-SSE-WIN-NEXT: retq -; -; X64-SSE-LIN-LABEL: t_to_u32: -; X64-SSE-LIN: # %bb.0: -; X64-SSE-LIN-NEXT: pushq %rax -; X64-SSE-LIN-NEXT: callq __fixunstfsi -; X64-SSE-LIN-NEXT: popq %rcx -; X64-SSE-LIN-NEXT: retq +; X64-SSE-LABEL: t_to_u32: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: jmp __fixunstfsi # TAILCALL ; ; X87-WIN-LABEL: t_to_u32: ; X87-WIN: # %bb.0: @@ -806,19 +786,9 @@ ; X86-AVX512-LIN-NEXT: addl $28, %esp ; X86-AVX512-LIN-NEXT: retl ; -; X64-AVX512-WIN-LABEL: t_to_s32: -; X64-AVX512-WIN: # %bb.0: -; X64-AVX512-WIN-NEXT: subq $40, %rsp -; X64-AVX512-WIN-NEXT: callq __fixtfsi -; X64-AVX512-WIN-NEXT: addq $40, %rsp -; X64-AVX512-WIN-NEXT: retq -; -; X64-AVX512-LIN-LABEL: t_to_s32: -; X64-AVX512-LIN: # %bb.0: -; X64-AVX512-LIN-NEXT: pushq %rax -; X64-AVX512-LIN-NEXT: callq __fixtfsi -; X64-AVX512-LIN-NEXT: popq %rcx -; X64-AVX512-LIN-NEXT: retq +; X64-AVX512-LABEL: t_to_s32: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: jmp __fixtfsi # TAILCALL ; ; X86-SSE-WIN-LABEL: t_to_s32: ; X86-SSE-WIN: # %bb.0: @@ -841,19 +811,9 @@ ; X86-SSE-LIN-NEXT: addl $28, %esp ; X86-SSE-LIN-NEXT: retl ; -; X64-SSE-WIN-LABEL: t_to_s32: -; X64-SSE-WIN: # %bb.0: -; X64-SSE-WIN-NEXT: subq $40, %rsp -; X64-SSE-WIN-NEXT: callq __fixtfsi -; X64-SSE-WIN-NEXT: addq $40, %rsp -; X64-SSE-WIN-NEXT: retq -; -; X64-SSE-LIN-LABEL: t_to_s32: -; X64-SSE-LIN: # %bb.0: -; X64-SSE-LIN-NEXT: pushq %rax -; X64-SSE-LIN-NEXT: callq __fixtfsi -; X64-SSE-LIN-NEXT: popq %rcx -; X64-SSE-LIN-NEXT: retq +; X64-SSE-LABEL: t_to_s32: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: jmp __fixtfsi # TAILCALL ; ; X87-WIN-LABEL: t_to_s32: ; X87-WIN: # %bb.0: diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll --- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll +++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll @@ -1,15 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,X86-AVX512,X86-AVX512DQVL,X86-AVX512-WIN,X86-AVX512DQVL-WIN ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,X86-AVX512,X86-AVX512DQVL,X86-AVX512-LIN,X86-AVX512DQVL-LIN -; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX512,X64-AVX512DQVL,X64-AVX512-WIN,X64-AVX512DQVL-WIN +; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,X86-AVX512,X64-AVX512DQVL,X64-AVX512-WIN,X64-AVX512DQVL-WIN ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,X64-AVX512,X64-AVX512DQVL,X64-AVX512-LIN,X64-AVX512DQVL-LIN ; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,X86,X86-AVX512,X86-AVX512DQ,X86-AVX512-WIN,X86-AVX512DQ-WIN ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,X86,X86-AVX512,X86-AVX512DQ,X86-AVX512-LIN,X86-AVX512DQ-LIN -; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX512,X64-AVX512DQ,X64-AVX512-WIN,X64-AVX512DQ-WIN +; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,X64,X86-AVX512,X64-AVX512DQ,X64-AVX512-WIN,X64-AVX512DQ-WIN ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,X86,X64-AVX512,X64-AVX512DQ,X64-AVX512-LIN,X64-AVX512DQ-LIN ; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86,X86-AVX512,X86-AVX512F,X86-AVX512-WIN,X86-AVX512F-WIN ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86,X86-AVX512,X86-AVX512F,X86-AVX512-LIN,X86-AVX512F-LIN -; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64,X64-AVX512,X64-AVX512F,X64-AVX512-WIN,X64-AVX512F-WIN +; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64,X86-AVX512,X64-AVX512F,X64-AVX512-WIN,X64-AVX512F-WIN ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X86,X64-AVX512,X64-AVX512F,X64-AVX512-LIN,X64-AVX512F-LIN ; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,X86,X86-SSE,X86-SSE3,X86-SSE-WIN,X86-SSE3-WIN ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,X86,X86-SSE,X86-SSE3,X86-SSE-LIN,X86-SSE3-LIN @@ -1415,19 +1415,13 @@ ; X86-AVX512-LIN-NEXT: addl $28, %esp ; X86-AVX512-LIN-NEXT: retl ; -; X64-AVX512-WIN-LABEL: t_to_u64: -; X64-AVX512-WIN: # %bb.0: -; X64-AVX512-WIN-NEXT: subq $40, %rsp -; X64-AVX512-WIN-NEXT: callq __fixunstfdi -; X64-AVX512-WIN-NEXT: addq $40, %rsp -; X64-AVX512-WIN-NEXT: retq +; X64-LABEL: t_to_u64: +; X64: # %bb.0: +; X64-NEXT: jmp __fixunstfdi # TAILCALL ; -; X64-AVX512-LIN-LABEL: t_to_u64: -; X64-AVX512-LIN: # %bb.0: -; X64-AVX512-LIN-NEXT: pushq %rax -; X64-AVX512-LIN-NEXT: callq __fixunstfdi -; X64-AVX512-LIN-NEXT: popq %rcx -; X64-AVX512-LIN-NEXT: retq +; X64-AVX512-LABEL: t_to_u64: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: jmp __fixunstfdi # TAILCALL ; ; X86-SSE-WIN-LABEL: t_to_u64: ; X86-SSE-WIN: # %bb.0: @@ -1450,20 +1444,6 @@ ; X86-SSE-LIN-NEXT: addl $28, %esp ; X86-SSE-LIN-NEXT: retl ; -; X64-SSE-WIN-LABEL: t_to_u64: -; X64-SSE-WIN: # %bb.0: -; X64-SSE-WIN-NEXT: subq $40, %rsp -; X64-SSE-WIN-NEXT: callq __fixunstfdi -; X64-SSE-WIN-NEXT: addq $40, %rsp -; X64-SSE-WIN-NEXT: retq -; -; X64-SSE-LIN-LABEL: t_to_u64: -; X64-SSE-LIN: # %bb.0: -; X64-SSE-LIN-NEXT: pushq %rax -; X64-SSE-LIN-NEXT: callq __fixunstfdi -; X64-SSE-LIN-NEXT: popq %rcx -; X64-SSE-LIN-NEXT: retq -; ; X87-WIN-LABEL: t_to_u64: ; X87-WIN: # %bb.0: ; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp) @@ -1507,19 +1487,13 @@ ; X86-AVX512-LIN-NEXT: addl $28, %esp ; X86-AVX512-LIN-NEXT: retl ; -; X64-AVX512-WIN-LABEL: t_to_s64: -; X64-AVX512-WIN: # %bb.0: -; X64-AVX512-WIN-NEXT: subq $40, %rsp -; X64-AVX512-WIN-NEXT: callq __fixtfdi -; X64-AVX512-WIN-NEXT: addq $40, %rsp -; X64-AVX512-WIN-NEXT: retq +; X64-LABEL: t_to_s64: +; X64: # %bb.0: +; X64-NEXT: jmp __fixtfdi # TAILCALL ; -; X64-AVX512-LIN-LABEL: t_to_s64: -; X64-AVX512-LIN: # %bb.0: -; X64-AVX512-LIN-NEXT: pushq %rax -; X64-AVX512-LIN-NEXT: callq __fixtfdi -; X64-AVX512-LIN-NEXT: popq %rcx -; X64-AVX512-LIN-NEXT: retq +; X64-AVX512-LABEL: t_to_s64: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: jmp __fixtfdi # TAILCALL ; ; X86-SSE-WIN-LABEL: t_to_s64: ; X86-SSE-WIN: # %bb.0: @@ -1542,20 +1516,6 @@ ; X86-SSE-LIN-NEXT: addl $28, %esp ; X86-SSE-LIN-NEXT: retl ; -; X64-SSE-WIN-LABEL: t_to_s64: -; X64-SSE-WIN: # %bb.0: -; X64-SSE-WIN-NEXT: subq $40, %rsp -; X64-SSE-WIN-NEXT: callq __fixtfdi -; X64-SSE-WIN-NEXT: addq $40, %rsp -; X64-SSE-WIN-NEXT: retq -; -; X64-SSE-LIN-LABEL: t_to_s64: -; X64-SSE-LIN: # %bb.0: -; X64-SSE-LIN-NEXT: pushq %rax -; X64-SSE-LIN-NEXT: callq __fixtfdi -; X64-SSE-LIN-NEXT: popq %rcx -; X64-SSE-LIN-NEXT: retq -; ; X87-WIN-LABEL: t_to_s64: ; X87-WIN: # %bb.0: ; X87-WIN-NEXT: pushl {{[0-9]+}}(%esp)