diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3629,7 +3629,8 @@ EVT RetVT, ArrayRef Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, - SDValue Chain = SDValue()) const; + SDValue Chain = SDValue(), + bool IsTailCall = false) const; /// Check whether parameters to a call that are passed in callee saved /// registers are the same as from the calling function. This needs to be diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -168,9 +168,8 @@ TargetLowering::MakeLibCallOptions CallOptions; EVT OpVT = N->getOperand(0 + Offset).getValueType(); CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); - std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, - CallOptions, SDLoc(N), - Chain); + std::pair Tmp = + MakeLibCall(N, LC, NVT, Op, CallOptions, SDLoc(N), Chain); if (IsStrict) ReplaceValueWith(SDValue(N, 1), Tmp.second); return Tmp.first; @@ -189,9 +188,8 @@ EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(), N->getOperand(1 + Offset).getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops, - CallOptions, SDLoc(N), - Chain); + std::pair Tmp = + MakeLibCall(N, LC, NVT, Ops, CallOptions, SDLoc(N), Chain); if (IsStrict) ReplaceValueWith(SDValue(N, 1), Tmp.second); return Tmp.first; @@ -454,14 +452,11 @@ N->getOperand(1 + Offset).getValueType(), N->getOperand(2 + Offset).getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - std::pair Tmp = TLI.makeLibCall(DAG, - GetFPLibCall(N->getValueType(0), - RTLIB::FMA_F32, - RTLIB::FMA_F64, - RTLIB::FMA_F80, - RTLIB::FMA_F128, - RTLIB::FMA_PPCF128), - NVT, Ops, CallOptions, SDLoc(N), Chain); + std::pair Tmp = MakeLibCall( + N, + GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), + NVT, Ops, CallOptions, SDLoc(N), Chain); if (IsStrict) ReplaceValueWith(SDValue(N, 1), Tmp.second); return Tmp.first; @@ -528,9 +523,8 @@ TargetLowering::MakeLibCallOptions CallOptions; EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); - std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, - CallOptions, SDLoc(N), - Chain); + std::pair Tmp = + MakeLibCall(N, LC, NVT, Op, CallOptions, SDLoc(N), Chain); if (IsStrict) ReplaceValueWith(SDValue(N, 1), Tmp.second); return Tmp.first; @@ -544,15 +538,16 @@ TargetLowering::MakeLibCallOptions CallOptions; EVT OpsVT[1] = { N->getOperand(0).getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op, - CallOptions, SDLoc(N)).first; + SDValue Res32 = + MakeLibCall(N, RTLIB::FPEXT_F16_F32, MidVT, Op, CallOptions, SDLoc(N)) + .first; if (N->getValueType(0) == MVT::f32) return Res32; EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first; + return MakeLibCall(N, LC, NVT, Res32, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -565,9 +560,8 @@ TargetLowering::MakeLibCallOptions CallOptions; EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType(); CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true); - std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Op, - CallOptions, SDLoc(N), - Chain); + std::pair Tmp = + MakeLibCall(N, LC, NVT, Op, CallOptions, SDLoc(N), Chain); if (IsStrict) ReplaceValueWith(SDValue(N, 1), Tmp.second); return Tmp.first; @@ -613,11 +607,12 @@ EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(), N->getOperand(1 + Offset).getValueType() }; CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); - std::pair Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops, - CallOptions, SDLoc(N), - Chain); + + auto Tmp = MakeLibCall(N, LC, NVT, Ops, CallOptions, SDLoc(N), Chain); + if (IsStrict) ReplaceValueWith(SDValue(N, 1), Tmp.second); + return Tmp.first; } @@ -794,8 +789,8 @@ CallOptions.setSExt(Signed); CallOptions.setTypeListBeforeSoften(SVT, RVT, true); std::pair Tmp = - TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - Op, CallOptions, dl, Chain); + MakeLibCall(N, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), Op, + CallOptions, dl, Chain); if (IsStrict) ReplaceValueWith(SDValue(N, 1), Tmp.second); @@ -907,9 +902,8 @@ Op = GetSoftenedFloat(Op); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setTypeListBeforeSoften(SVT, RVT, true); - std::pair Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, - CallOptions, SDLoc(N), - Chain); + std::pair Tmp = + TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N), Chain); if (IsStrict) { ReplaceValueWith(SDValue(N, 1), Tmp.second); ReplaceValueWith(SDValue(N, 0), Tmp.first); @@ -1304,9 +1298,8 @@ SDValue Op = N->getOperand(0 + Offset); SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - std::pair Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0), - Op, CallOptions, SDLoc(N), - Chain); + std::pair Tmp = + MakeLibCall(N, LC, N->getValueType(0), Op, CallOptions, SDLoc(N), Chain); if (IsStrict) ReplaceValueWith(SDValue(N, 1), Tmp.second); GetPairElements(Tmp.first, Lo, Hi); @@ -1319,9 +1312,8 @@ SDValue Ops[] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset) }; SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - std::pair Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0), - Ops, CallOptions, SDLoc(N), - Chain); + std::pair Tmp = + MakeLibCall(N, LC, N->getValueType(0), Ops, CallOptions, SDLoc(N), Chain); if (IsStrict) ReplaceValueWith(SDValue(N, 1), Tmp.second); GetPairElements(Tmp.first, Lo, Hi); @@ -1465,14 +1457,11 @@ N->getOperand(2 + Offset) }; SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; - std::pair Tmp = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), - RTLIB::FMA_F32, - RTLIB::FMA_F64, - RTLIB::FMA_F80, - RTLIB::FMA_F128, - RTLIB::FMA_PPCF128), - N->getValueType(0), Ops, CallOptions, - SDLoc(N), Chain); + std::pair Tmp = MakeLibCall( + N, + GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), + N->getValueType(0), Ops, CallOptions, SDLoc(N), Chain); if (IsStrict) ReplaceValueWith(SDValue(N, 1), Tmp.second); GetPairElements(Tmp.first, Lo, Hi); @@ -1706,7 +1695,7 @@ TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); std::pair Tmp = - TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl, Chain); + MakeLibCall(N, LC, VT, Src, CallOptions, dl, Chain); if (Strict) Chain = Tmp.second; GetPairElements(Tmp.first, Lo, Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2597,8 +2597,8 @@ "Unexpected atomic op or value type!"); Ops.append(Node->op_begin() + 1, Node->op_end()); } - return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node), - Node->getOperand(0)); + return MakeLibCall(Node, LC, RetVT, Ops, CallOptions, SDLoc(Node), + Node->getOperand(0)); } /// N is a shift by a value that needs to be expanded, @@ -3433,8 +3433,8 @@ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - std::pair Tmp = TLI.makeLibCall(DAG, LC, VT, Op, - CallOptions, dl, Chain); + std::pair Tmp = + MakeLibCall(N, LC, VT, Op, CallOptions, dl, Chain); SplitInteger(Tmp.first, Lo, Hi); if (IsStrict) @@ -3464,8 +3464,8 @@ RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; - std::pair Tmp = TLI.makeLibCall(DAG, LC, VT, Op, - CallOptions, dl, Chain); + std::pair Tmp = + MakeLibCall(N, LC, VT, Op, CallOptions, dl, Chain); SplitInteger(Tmp.first, Lo, Hi); if (IsStrict) @@ -3561,9 +3561,8 @@ TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - std::pair Tmp = TLI.makeLibCall(DAG, LC, RetVT, - Op, CallOptions, dl, - Chain); + std::pair Tmp = + MakeLibCall(N, LC, RetVT, Op, CallOptions, dl, Chain); SplitInteger(Tmp.first, Lo, Hi); if (N->isStrictFPOpcode()) @@ -3777,8 +3776,7 @@ SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, - Lo, Hi); + SplitInteger(MakeLibCall(N, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo, @@ -4137,7 +4135,7 @@ TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); + SplitInteger(MakeLibCall(N, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo, @@ -4360,7 +4358,7 @@ SDValue Ops[2] = {N->getOperand(0), ShAmt}; TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(isSigned); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); + SplitInteger(MakeLibCall(N, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); return; } @@ -4450,7 +4448,7 @@ TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); + SplitInteger(MakeLibCall(N, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, @@ -4643,7 +4641,7 @@ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); TargetLowering::MakeLibCallOptions CallOptions; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); + SplitInteger(MakeLibCall(N, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, @@ -4686,7 +4684,7 @@ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); TargetLowering::MakeLibCallOptions CallOptions; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); + SplitInteger(MakeLibCall(N, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -236,6 +236,14 @@ void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, SDValue &Lo, SDValue &Hi); + /// Wrapper around TLI.MakeLibCall that detects if the libcall can be made a + /// tail call. If eligible to tail call, will delete N and any of its uses. + /// Returns a pair of + std::pair + MakeLibCall(SDNode *N, RTLIB::Libcall LC, EVT NVT, ArrayRef Ops, + TargetLowering::MakeLibCallOptions CallOptions, const SDLoc &dl, + SDValue Chain = SDValue()); + //===--------------------------------------------------------------------===// // Integer Promotion Support: LegalizeIntegerTypes.cpp //===--------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -202,16 +202,6 @@ bool DAGTypeLegalizer::run() { bool Changed = false; - // Create a dummy node (which is not added to allnodes), that adds a reference - // to the root node, preventing it from being deleted, and tracking any - // changes of the root. - HandleSDNode Dummy(DAG.getRoot()); - Dummy.setNodeId(Unanalyzed); - - // The root of the dag may dangle to deleted nodes until the type legalizer is - // done. Set it to null to avoid confusion. - DAG.setRoot(SDValue()); - // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess' // (and remembering them) if they are leaves and assigning 'Unanalyzed' if // non-leaves. @@ -439,9 +429,6 @@ #endif PerformExpensiveChecks(); - // If the root changed (e.g. it was a dead load) update the root. - DAG.setRoot(Dummy.getValue()); - // Remove dead nodes. This is important to do for cleanliness but also before // the checking loop below. Implicit folding by the DAG.getNode operators and // node morphing can cause unreachable nodes to be around with their flags set @@ -1050,6 +1037,55 @@ SplitInteger(Op, HalfVT, HalfVT, Lo, Hi); } +std::pair +DAGTypeLegalizer::MakeLibCall(SDNode *N, RTLIB::Libcall LC, EVT NVT, + ArrayRef Ops, + TargetLowering::MakeLibCallOptions CallOptions, + const SDLoc &dl, SDValue Chain) { + SDValue &InChain = Chain; + bool IsTailCall = TLI.isInTailCallPosition(DAG, N, Chain); + + auto Res = TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, SDLoc(N), InChain, + IsTailCall); + if (!Res.second.getNode()) { + // TLI.makeLibCall successfully managed to make a tail call. It will have + // returned a pair of null nodes, and the DAG's root will have been set to + // the tail call. + // Res.first = DAG.getRoot(); + + LLVM_DEBUG(dbgs() << "LibCall is a tail call: "; DAG.getRoot().dump(&DAG)); + + // Unlike regular calls, tail calls don't have any CopyFromReg nodes to pass + // the result along. That means there's no result to store in + // ExpandedIntegers/SoftenedFloats etc, so when LegalizeTypes continues + // after N and analyze its uses, it would complain when legalizing their + // operands. + // + // But the uses of N aren't actually needed anymore because we're doing a + // tail call These will just be bitcasts and return nodes etc. that are no + // longer conencted to the root node, so here we just delete them from the + // graph. + SmallVector Uses(1, N), ToDelete; + while (!Uses.empty()) { + SDNode *UN = Uses.pop_back_val(); + for (SDNode *Use : UN->uses()) + Uses.push_back(Use); + ToDelete.push_back(UN); + } + + for (auto It = ToDelete.rbegin(); It != ToDelete.rend(); It++) { + SDNode *ToDelete = *It; + if (ToDelete->getOpcode() == ISD::DELETED_NODE) + continue; + LLVM_DEBUG(dbgs() << "Deleting node: "; ToDelete->dump(&DAG)); + DAG.DeleteNode(ToDelete); + } + + // Then make sure to legalize the actual tail call itself + AnalyzeNewNode(DAG.getRoot().getNode()); + } + return Res; +} //===----------------------------------------------------------------------===// // Entry Point diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -143,9 +143,8 @@ std::pair TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef Ops, - MakeLibCallOptions CallOptions, - const SDLoc &dl, - SDValue InChain) const { + MakeLibCallOptions CallOptions, const SDLoc &dl, + SDValue InChain, bool IsTailCall) const { if (!InChain) InChain = DAG.getEntryNode(); @@ -189,6 +188,7 @@ .setNoReturn(CallOptions.DoesNotReturn) .setDiscardResult(!CallOptions.IsReturnValueUsed) .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization) + .setTailCall(IsTailCall) .setSExtResult(signExtend) .setZExtResult(zeroExtend); return LowerCallTo(CLI); diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll --- a/llvm/test/CodeGen/RISCV/double-arith.ll +++ b/llvm/test/CodeGen/RISCV/double-arith.ll @@ -30,12 +30,7 @@ ; ; RV64I-LABEL: fadd_d: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __adddf3@plt %1 = fadd double %a, %b ret double %1 } @@ -57,12 +52,7 @@ ; ; RV64I-LABEL: fsub_d: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call __subdf3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __subdf3@plt %1 = fsub double %a, %b ret double %1 } @@ -84,12 +74,7 @@ ; ; RV64I-LABEL: fmul_d: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call __muldf3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __muldf3@plt %1 = fmul double %a, %b ret double %1 } @@ -111,12 +96,7 @@ ; ; RV64I-LABEL: fdiv_d: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call __divdf3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __divdf3@plt %1 = fdiv double %a, %b ret double %1 } @@ -140,12 +120,7 @@ ; ; RV64I-LABEL: fsqrt_d: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call sqrt@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail sqrt@plt %1 = call double @llvm.sqrt.f64(double %a) ret double %1 } @@ -293,10 +268,9 @@ ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: slli a0, a0, 1 ; RV64I-NEXT: srli a0, a0, 1 -; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __adddf3@plt %1 = fadd double %a, %b %2 = call double @llvm.fabs.f64(double %1) %3 = fadd double %2, %1 @@ -322,12 +296,7 @@ ; ; RV64I-LABEL: fmin_d: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call fmin@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail fmin@plt %1 = call double @llvm.minnum.f64(double %a, double %b) ret double %1 } @@ -351,12 +320,7 @@ ; ; RV64I-LABEL: fmax_d: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call fmax@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail fmax@plt %1 = call double @llvm.maxnum.f64(double %a, double %b) ret double %1 } @@ -380,12 +344,7 @@ ; ; RV64I-LABEL: fmadd_d: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call fma@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail fma@plt %1 = call double @llvm.fma.f64(double %a, double %b, double %c) ret double %1 } @@ -454,12 +413,11 @@ ; RV64I-NEXT: xor a2, a0, a1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a1, s0 -; RV64I-NEXT: call fma@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 -; RV64I-NEXT: ret +; RV64I-NEXT: tail fma@plt %c_ = fadd double 0.0, %c ; avoid negation using xor %negc = fsub double -0.0, %c_ %1 = call double @llvm.fma.f64(double %a, double %b, double %negc) @@ -547,13 +505,12 @@ ; RV64I-NEXT: xor a2, a0, a2 ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: mv a1, s1 -; RV64I-NEXT: call fma@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 -; RV64I-NEXT: ret +; RV64I-NEXT: tail fma@plt %a_ = fadd double 0.0, %a %c_ = fadd double 0.0, %c %nega = fsub double -0.0, %a_ @@ -644,13 +601,12 @@ ; RV64I-NEXT: xor a1, s2, a2 ; RV64I-NEXT: xor a2, a0, a2 ; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: call fma@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 -; RV64I-NEXT: ret +; RV64I-NEXT: tail fma@plt %b_ = fadd double 0.0, %b %c_ = fadd double 0.0, %c %negb = fsub double -0.0, %b_ @@ -787,12 +743,11 @@ ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: mv a1, s1 ; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: call fma@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 -; RV64I-NEXT: ret +; RV64I-NEXT: tail fma@plt %a_ = fadd double 0.0, %a %nega = fsub double -0.0, %a_ %1 = call double @llvm.fma.f64(double %nega, double %b, double %c) @@ -863,12 +818,11 @@ ; RV64I-NEXT: xor a1, a0, a1 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: call fma@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 -; RV64I-NEXT: ret +; RV64I-NEXT: tail fma@plt %b_ = fadd double 0.0, %b %negb = fsub double -0.0, %b_ %1 = call double @llvm.fma.f64(double %a, double %negb, double %c) @@ -907,11 +861,10 @@ ; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: call __muldf3@plt ; RV64I-NEXT: mv a1, s0 -; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __adddf3@plt %1 = fmul contract double %a, %b %2 = fadd contract double %1, %c ret double %2 @@ -988,13 +941,12 @@ ; RV64I-NEXT: mv a1, s0 ; RV64I-NEXT: call __muldf3@plt ; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: call __subdf3@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __subdf3@plt %c_ = fadd double 0.0, %c ; avoid negation using xor %1 = fmul contract double %a, %b %2 = fsub contract double %1, %c_ @@ -1100,13 +1052,12 @@ ; RV64I-NEXT: slli a1, a1, 63 ; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: mv a1, s0 -; RV64I-NEXT: call __subdf3@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __subdf3@plt %a_ = fadd double 0.0, %a ; avoid negation using xor %b_ = fadd double 0.0, %b ; avoid negation using xor %c_ = fadd double 0.0, %c ; avoid negation using xor @@ -1197,13 +1148,12 @@ ; RV64I-NEXT: call __muldf3@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call __subdf3@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __subdf3@plt %a_ = fadd double 0.0, %a ; avoid negation using xor %b_ = fadd double 0.0, %b ; avoid negation using xor %1 = fmul contract double %a_, %b_ diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -16,12 +16,7 @@ ; ; RV32I-LABEL: fcvt_s_d: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __truncdfsf2@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __truncdfsf2@plt ; ; RV64I-LABEL: fcvt_s_d: ; RV64I: # %bb.0: @@ -52,12 +47,7 @@ ; ; RV64I-LABEL: fcvt_d_s: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call __extendsfdf2@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __extendsfdf2@plt %1 = fpext float %a to double ret double %1 } @@ -389,13 +379,8 @@ ; ; RV64I-LABEL: fcvt_d_w: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: call __floatsidf@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __floatsidf@plt %1 = sitofp i32 %a to double ret double %1 } @@ -419,13 +404,8 @@ ; ; RV64I-LABEL: fcvt_d_w_load: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lw a0, 0(a0) -; RV64I-NEXT: call __floatsidf@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __floatsidf@plt %a = load i32, ptr %p %1 = sitofp i32 %a to double ret double %1 @@ -448,13 +428,8 @@ ; ; RV64I-LABEL: fcvt_d_wu: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: call __floatunsidf@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __floatunsidf@plt %1 = uitofp i32 %a to double ret double %1 } @@ -484,13 +459,8 @@ ; ; RV64I-LABEL: fcvt_d_wu_load: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lw a0, 0(a0) -; RV64I-NEXT: call __floatunsidf@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __floatunsidf@plt %a = load i32, ptr %p %1 = uitofp i32 %a to double ret double %1 @@ -903,12 +873,7 @@ ; ; RV64I-LABEL: fmv_x_d: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __adddf3@plt %1 = fadd double %a, %b %2 = bitcast double %1 to i64 ret i64 %2 @@ -940,12 +905,7 @@ ; ; RV64I-LABEL: fcvt_d_l: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call __floatdidf@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __floatdidf@plt %1 = sitofp i64 %a to double ret double %1 } @@ -976,12 +936,7 @@ ; ; RV64I-LABEL: fcvt_d_lu: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call __floatundidf@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __floatundidf@plt %1 = uitofp i64 %a to double ret double %1 } @@ -1019,12 +974,7 @@ ; ; RV64I-LABEL: fmv_d_x: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call __adddf3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __adddf3@plt %1 = bitcast i64 %a to double %2 = bitcast i64 %b to double %3 = fadd double %1, %2 @@ -1048,12 +998,7 @@ ; ; RV64I-LABEL: fcvt_d_w_i8: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call __floatsidf@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __floatsidf@plt %1 = sitofp i8 %a to double ret double %1 } @@ -1075,12 +1020,7 @@ ; ; RV64I-LABEL: fcvt_d_wu_i8: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call __floatunsidf@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __floatunsidf@plt %1 = uitofp i8 %a to double ret double %1 } @@ -1102,12 +1042,7 @@ ; ; RV64I-LABEL: fcvt_d_w_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call __floatsidf@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __floatsidf@plt %1 = sitofp i16 %a to double ret double %1 } @@ -1129,12 +1064,7 @@ ; ; RV64I-LABEL: fcvt_d_wu_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call __floatunsidf@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __floatunsidf@plt %1 = uitofp i16 %a to double ret double %1 } diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll --- a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll +++ b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll @@ -249,12 +249,11 @@ ; RV64I-NEXT: call cos@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __adddf3@plt %1 = call double @llvm.experimental.constrained.sin.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp %2 = call double @llvm.experimental.constrained.cos.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp %3 = fadd double %1, %2 diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/double-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/double-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/double-intrinsics.ll @@ -29,12 +29,7 @@ ; ; RV64I-LABEL: sqrt_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call sqrt@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail sqrt@plt %1 = call double @llvm.sqrt.f64(double %a) ret double %1 } @@ -67,13 +62,8 @@ ; ; RV64I-LABEL: powi_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sext.w a1, a1 -; RV64I-NEXT: call __powidf2@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __powidf2@plt %1 = call double @llvm.powi.f64.i32(double %a, i32 %b) ret double %1 } @@ -96,12 +86,7 @@ ; ; RV64I-LABEL: sin_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call sin@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail sin@plt %1 = call double @llvm.sin.f64(double %a) ret double %1 } @@ -124,12 +109,7 @@ ; ; RV64I-LABEL: cos_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call cos@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail cos@plt %1 = call double @llvm.cos.f64(double %a) ret double %1 } @@ -214,12 +194,11 @@ ; RV64I-NEXT: call cos@plt ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __adddf3@plt %1 = call double @llvm.sin.f64(double %a) %2 = call double @llvm.cos.f64(double %a) %3 = fadd double %1, %2 @@ -244,12 +223,7 @@ ; ; RV64I-LABEL: pow_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call pow@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail pow@plt %1 = call double @llvm.pow.f64(double %a, double %b) ret double %1 } @@ -272,12 +246,7 @@ ; ; RV64I-LABEL: exp_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call exp@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail exp@plt %1 = call double @llvm.exp.f64(double %a) ret double %1 } @@ -300,12 +269,7 @@ ; ; RV64I-LABEL: exp2_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call exp2@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail exp2@plt %1 = call double @llvm.exp2.f64(double %a) ret double %1 } @@ -328,12 +292,7 @@ ; ; RV64I-LABEL: log_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call log@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail log@plt %1 = call double @llvm.log.f64(double %a) ret double %1 } @@ -356,12 +315,7 @@ ; ; RV64I-LABEL: log10_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call log10@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail log10@plt %1 = call double @llvm.log10.f64(double %a) ret double %1 } @@ -384,12 +338,7 @@ ; ; RV64I-LABEL: log2_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call log2@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail log2@plt %1 = call double @llvm.log2.f64(double %a) ret double %1 } @@ -413,12 +362,7 @@ ; ; RV64I-LABEL: fma_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call fma@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail fma@plt %1 = call double @llvm.fma.f64(double %a, double %b, double %c) ret double %1 } @@ -457,11 +401,10 @@ ; RV64I-NEXT: mv s0, a2 ; RV64I-NEXT: call __muldf3@plt ; RV64I-NEXT: mv a1, s0 -; RV64I-NEXT: call __adddf3@plt ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __adddf3@plt %1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c) ret double %1 } @@ -508,12 +451,7 @@ ; ; RV64I-LABEL: minnum_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call fmin@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail fmin@plt %1 = call double @llvm.minnum.f64(double %a, double %b) ret double %1 } @@ -537,12 +475,7 @@ ; ; RV64I-LABEL: maxnum_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call fmax@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail fmax@plt %1 = call double @llvm.maxnum.f64(double %a, double %b) ret double %1 } @@ -625,12 +558,7 @@ ; ; RV64I-LABEL: floor_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call floor@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail floor@plt %1 = call double @llvm.floor.f64(double %a) ret double %1 } @@ -667,12 +595,7 @@ ; ; RV64I-LABEL: ceil_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call ceil@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail ceil@plt %1 = call double @llvm.ceil.f64(double %a) ret double %1 } @@ -709,12 +632,7 @@ ; ; RV64I-LABEL: trunc_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call trunc@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail trunc@plt %1 = call double @llvm.trunc.f64(double %a) ret double %1 } @@ -751,12 +669,7 @@ ; ; RV64I-LABEL: rint_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call rint@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail rint@plt %1 = call double @llvm.rint.f64(double %a) ret double %1 } @@ -779,12 +692,7 @@ ; ; RV64I-LABEL: nearbyint_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call nearbyint@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail nearbyint@plt %1 = call double @llvm.nearbyint.f64(double %a) ret double %1 } @@ -821,12 +729,7 @@ ; ; RV64I-LABEL: round_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call round@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail round@plt %1 = call double @llvm.round.f64(double %a) ret double %1 } @@ -863,12 +766,7 @@ ; ; RV64I-LABEL: roundeven_f64: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: call roundeven@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail roundeven@plt %1 = call double @llvm.roundeven.f64(double %a) ret double %1 } diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll --- a/llvm/test/CodeGen/RISCV/float-arith.ll +++ b/llvm/test/CodeGen/RISCV/float-arith.ll @@ -21,12 +21,7 @@ ; ; RV32I-LABEL: fadd_s: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __addsf3@plt ; ; RV64I-LABEL: fadd_s: ; RV64I: # %bb.0: @@ -48,12 +43,7 @@ ; ; RV32I-LABEL: fsub_s: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __subsf3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __subsf3@plt ; ; RV64I-LABEL: fsub_s: ; RV64I: # %bb.0: @@ -75,12 +65,7 @@ ; ; RV32I-LABEL: fmul_s: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __mulsf3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __mulsf3@plt ; ; RV64I-LABEL: fmul_s: ; RV64I: # %bb.0: @@ -102,12 +87,7 @@ ; ; RV32I-LABEL: fdiv_s: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __divsf3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __divsf3@plt ; ; RV64I-LABEL: fdiv_s: ; RV64I: # %bb.0: @@ -131,12 +111,7 @@ ; ; RV32I-LABEL: fsqrt_s: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call sqrtf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail sqrtf@plt ; ; RV64I-LABEL: fsqrt_s: ; RV64I: # %bb.0: @@ -287,10 +262,9 @@ ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: srli a0, a0, 1 -; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __addsf3@plt ; ; RV64I-LABEL: fabs_s: ; RV64I: # %bb.0: @@ -320,12 +294,7 @@ ; ; RV32I-LABEL: fmin_s: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call fminf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail fminf@plt ; ; RV64I-LABEL: fmin_s: ; RV64I: # %bb.0: @@ -349,12 +318,7 @@ ; ; RV32I-LABEL: fmax_s: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call fmaxf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail fmaxf@plt ; ; RV64I-LABEL: fmax_s: ; RV64I: # %bb.0: @@ -378,12 +342,7 @@ ; ; RV32I-LABEL: fmadd_s: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call fmaf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail fmaf@plt ; ; RV64I-LABEL: fmadd_s: ; RV64I: # %bb.0: @@ -420,12 +379,11 @@ ; RV32I-NEXT: xor a2, a0, a2 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail fmaf@plt ; ; RV64I-LABEL: fmsub_s: ; RV64I: # %bb.0: @@ -483,13 +441,12 @@ ; RV32I-NEXT: xor a2, a0, a2 ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail fmaf@plt ; ; RV64I-LABEL: fnmadd_s: ; RV64I: # %bb.0: @@ -555,13 +512,12 @@ ; RV32I-NEXT: xor a1, s2, a2 ; RV32I-NEXT: xor a2, a0, a2 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail fmaf@plt ; ; RV64I-LABEL: fnmadd_s_2: ; RV64I: # %bb.0: @@ -707,12 +663,11 @@ ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: mv a1, s1 ; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail fmaf@plt ; ; RV64I-LABEL: fnmsub_s: ; RV64I: # %bb.0: @@ -763,12 +718,11 @@ ; RV32I-NEXT: xor a1, a0, a1 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: call fmaf@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail fmaf@plt ; ; RV64I-LABEL: fnmsub_s_2: ; RV64I: # %bb.0: @@ -811,11 +765,10 @@ ; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __addsf3@plt ; ; RV64I-LABEL: fmadd_s_contract: ; RV64I: # %bb.0: @@ -860,13 +813,12 @@ ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: call __subsf3@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __subsf3@plt ; ; RV64I-LABEL: fmsub_s_contract: ; RV64I: # %bb.0: @@ -934,13 +886,12 @@ ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: call __subsf3@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __subsf3@plt ; ; RV64I-LABEL: fnmadd_s_contract: ; RV64I: # %bb.0: @@ -1013,13 +964,12 @@ ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __subsf3@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __subsf3@plt ; ; RV64I-LABEL: fnmsub_s_contract: ; RV64I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/float-bit-preserving-dagcombines.ll b/llvm/test/CodeGen/RISCV/float-bit-preserving-dagcombines.ll --- a/llvm/test/CodeGen/RISCV/float-bit-preserving-dagcombines.ll +++ b/llvm/test/CodeGen/RISCV/float-bit-preserving-dagcombines.ll @@ -112,11 +112,10 @@ ; RV64F-NEXT: slli a0, a0, 1 ; RV64F-NEXT: srli a1, a0, 1 ; RV64F-NEXT: mv a0, s0 -; RV64F-NEXT: call __adddf3@plt ; RV64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64F-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64F-NEXT: addi sp, sp, 16 -; RV64F-NEXT: ret +; RV64F-NEXT: tail __adddf3@plt ; ; RV64FD-LABEL: bitcast_double_and: ; RV64FD: # %bb.0: @@ -235,11 +234,10 @@ ; RV64F-NEXT: slli a1, a1, 63 ; RV64F-NEXT: xor a1, a0, a1 ; RV64F-NEXT: mv a0, s0 -; RV64F-NEXT: call __muldf3@plt ; RV64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64F-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64F-NEXT: addi sp, sp, 16 -; RV64F-NEXT: ret +; RV64F-NEXT: tail __muldf3@plt ; ; RV64FD-LABEL: bitcast_double_xor: ; RV64FD: # %bb.0: @@ -362,11 +360,10 @@ ; RV64F-NEXT: slli a1, a1, 63 ; RV64F-NEXT: or a1, a0, a1 ; RV64F-NEXT: mv a0, s0 -; RV64F-NEXT: call __muldf3@plt ; RV64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64F-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64F-NEXT: addi sp, sp, 16 -; RV64F-NEXT: ret +; RV64F-NEXT: tail __muldf3@plt ; ; RV64FD-LABEL: bitcast_double_or: ; RV64FD: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -301,12 +301,7 @@ ; ; RV32I-LABEL: fmv_x_w: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __addsf3@plt ; ; RV64I-LABEL: fmv_x_w: ; RV64I: # %bb.0: @@ -330,12 +325,7 @@ ; ; RV32I-LABEL: fcvt_s_w: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __floatsisf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __floatsisf@plt ; ; RV64I-LABEL: fcvt_s_w: ; RV64I: # %bb.0: @@ -359,13 +349,8 @@ ; ; RV32I-LABEL: fcvt_s_w_load: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a0, 0(a0) -; RV32I-NEXT: call __floatsisf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __floatsisf@plt ; ; RV64I-LABEL: fcvt_s_w_load: ; RV64I: # %bb.0: @@ -389,12 +374,7 @@ ; ; RV32I-LABEL: fcvt_s_wu: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __floatunsisf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __floatunsisf@plt ; ; RV64I-LABEL: fcvt_s_wu: ; RV64I: # %bb.0: @@ -424,13 +404,8 @@ ; ; RV32I-LABEL: fcvt_s_wu_load: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a0, 0(a0) -; RV32I-NEXT: call __floatunsisf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __floatunsisf@plt ; ; RV64I-LABEL: fcvt_s_wu_load: ; RV64I: # %bb.0: @@ -456,12 +431,7 @@ ; ; RV32I-LABEL: fmv_w_x: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __addsf3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __addsf3@plt ; ; RV64I-LABEL: fmv_w_x: ; RV64I: # %bb.0: @@ -840,12 +810,7 @@ ; ; RV32I-LABEL: fcvt_s_l: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __floatdisf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __floatdisf@plt ; ; RV64I-LABEL: fcvt_s_l: ; RV64I: # %bb.0: @@ -876,12 +841,7 @@ ; ; RV32I-LABEL: fcvt_s_lu: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __floatundisf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __floatundisf@plt ; ; RV64I-LABEL: fcvt_s_lu: ; RV64I: # %bb.0: @@ -903,12 +863,7 @@ ; ; RV32I-LABEL: fcvt_s_w_i8: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __floatsisf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __floatsisf@plt ; ; RV64I-LABEL: fcvt_s_w_i8: ; RV64I: # %bb.0: @@ -930,12 +885,7 @@ ; ; RV32I-LABEL: fcvt_s_wu_i8: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __floatunsisf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __floatunsisf@plt ; ; RV64I-LABEL: fcvt_s_wu_i8: ; RV64I: # %bb.0: @@ -957,12 +907,7 @@ ; ; RV32I-LABEL: fcvt_s_w_i16: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __floatsisf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __floatsisf@plt ; ; RV64I-LABEL: fcvt_s_w_i16: ; RV64I: # %bb.0: @@ -984,12 +929,7 @@ ; ; RV32I-LABEL: fcvt_s_wu_i16: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __floatunsisf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __floatunsisf@plt ; ; RV64I-LABEL: fcvt_s_wu_i16: ; RV64I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/float-frem.ll b/llvm/test/CodeGen/RISCV/float-frem.ll --- a/llvm/test/CodeGen/RISCV/float-frem.ll +++ b/llvm/test/CodeGen/RISCV/float-frem.ll @@ -24,12 +24,7 @@ ; ; RV32I-LABEL: frem_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call fmodf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail fmodf@plt ; ; RV64I-LABEL: frem_f32: ; RV64I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll --- a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll +++ b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll @@ -220,12 +220,11 @@ ; RV32I-NEXT: call cosf@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __addsf3@plt ; ; RV64I-LABEL: sincos_f32: ; RV64I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll @@ -31,12 +31,7 @@ ; ; RV32I-LABEL: sqrt_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call sqrtf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail sqrtf@plt ; ; RV64I-LABEL: sqrt_f32: ; RV64I: # %bb.0: @@ -69,12 +64,7 @@ ; ; RV32I-LABEL: powi_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call __powisf2@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __powisf2@plt ; ; RV64I-LABEL: powi_f32: ; RV64I: # %bb.0: @@ -102,12 +92,7 @@ ; ; RV32I-LABEL: sin_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call sinf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail sinf@plt ; ; RV64I-LABEL: sin_f32: ; RV64I: # %bb.0: @@ -134,12 +119,7 @@ ; ; RV32I-LABEL: cos_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call cosf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail cosf@plt ; ; RV64I-LABEL: cos_f32: ; RV64I: # %bb.0: @@ -186,12 +166,11 @@ ; RV32I-NEXT: call cosf@plt ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __addsf3@plt ; ; RV64I-LABEL: sincos_f32: ; RV64I: # %bb.0: @@ -231,12 +210,7 @@ ; ; RV32I-LABEL: pow_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call powf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail powf@plt ; ; RV64I-LABEL: pow_f32: ; RV64I: # %bb.0: @@ -263,12 +237,7 @@ ; ; RV32I-LABEL: exp_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call expf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail expf@plt ; ; RV64I-LABEL: exp_f32: ; RV64I: # %bb.0: @@ -295,12 +264,7 @@ ; ; RV32I-LABEL: exp2_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call exp2f@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail exp2f@plt ; ; RV64I-LABEL: exp2_f32: ; RV64I: # %bb.0: @@ -327,12 +291,7 @@ ; ; RV32I-LABEL: log_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call logf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail logf@plt ; ; RV64I-LABEL: log_f32: ; RV64I: # %bb.0: @@ -359,12 +318,7 @@ ; ; RV32I-LABEL: log10_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call log10f@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail log10f@plt ; ; RV64I-LABEL: log10_f32: ; RV64I: # %bb.0: @@ -391,12 +345,7 @@ ; ; RV32I-LABEL: log2_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call log2f@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail log2f@plt ; ; RV64I-LABEL: log2_f32: ; RV64I: # %bb.0: @@ -425,12 +374,7 @@ ; ; RV32I-LABEL: fma_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call fmaf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail fmaf@plt ; ; RV64I-LABEL: fma_f32: ; RV64I: # %bb.0: @@ -465,11 +409,10 @@ ; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: call __mulsf3@plt ; RV32I-NEXT: mv a1, s0 -; RV32I-NEXT: call __addsf3@plt ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __addsf3@plt ; ; RV64I-LABEL: fmuladd_f32: ; RV64I: # %bb.0: @@ -531,12 +474,7 @@ ; ; RV32I-LABEL: minnum_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call fminf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail fminf@plt ; ; RV64I-LABEL: minnum_f32: ; RV64I: # %bb.0: @@ -565,12 +503,7 @@ ; ; RV32I-LABEL: maxnum_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call fmaxf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail fmaxf@plt ; ; RV64I-LABEL: maxnum_f32: ; RV64I: # %bb.0: @@ -668,12 +601,7 @@ ; ; RV32I-LABEL: floor_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call floorf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail floorf@plt ; ; RV64I-LABEL: floor_f32: ; RV64I: # %bb.0: @@ -720,12 +648,7 @@ ; ; RV32I-LABEL: ceil_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call ceilf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail ceilf@plt ; ; RV64I-LABEL: ceil_f32: ; RV64I: # %bb.0: @@ -772,12 +695,7 @@ ; ; RV32I-LABEL: trunc_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call truncf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail truncf@plt ; ; RV64I-LABEL: trunc_f32: ; RV64I: # %bb.0: @@ -824,12 +742,7 @@ ; ; RV32I-LABEL: rint_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call rintf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail rintf@plt ; ; RV64I-LABEL: rint_f32: ; RV64I: # %bb.0: @@ -856,12 +769,7 @@ ; ; RV32I-LABEL: nearbyint_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call nearbyintf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail nearbyintf@plt ; ; RV64I-LABEL: nearbyint_f32: ; RV64I: # %bb.0: @@ -908,12 +816,7 @@ ; ; RV32I-LABEL: round_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call roundf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail roundf@plt ; ; RV64I-LABEL: round_f32: ; RV64I: # %bb.0: @@ -960,12 +863,7 @@ ; ; RV32I-LABEL: roundeven_f32: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: call roundevenf@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail roundevenf@plt ; ; RV64I-LABEL: roundeven_f32: ; RV64I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/fmax-fmin.ll b/llvm/test/CodeGen/RISCV/fmax-fmin.ll --- a/llvm/test/CodeGen/RISCV/fmax-fmin.ll +++ b/llvm/test/CodeGen/RISCV/fmax-fmin.ll @@ -5,12 +5,7 @@ define float @maxnum_f32(float %x, float %y) nounwind { ; R32-LABEL: maxnum_f32: ; R32: # %bb.0: -; R32-NEXT: addi sp, sp, -16 -; R32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; R32-NEXT: call fmaxf@plt -; R32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; R32-NEXT: addi sp, sp, 16 -; R32-NEXT: ret +; R32-NEXT: tail fmaxf@plt ; ; R64-LABEL: maxnum_f32: ; R64: # %bb.0: @@ -80,12 +75,7 @@ ; ; R64-LABEL: maxnum_f64: ; R64: # %bb.0: -; R64-NEXT: addi sp, sp, -16 -; R64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; R64-NEXT: call fmax@plt -; R64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; R64-NEXT: addi sp, sp, 16 -; R64-NEXT: ret +; R64-NEXT: tail fmax@plt %r = call double @llvm.maxnum.f64(double %x, double %y) ret double %r } @@ -156,12 +146,7 @@ define float @minnum_f32(float %x, float %y) nounwind { ; R32-LABEL: minnum_f32: ; R32: # %bb.0: -; R32-NEXT: addi sp, sp, -16 -; R32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; R32-NEXT: call fminf@plt -; R32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; R32-NEXT: addi sp, sp, 16 -; R32-NEXT: ret +; R32-NEXT: tail fminf@plt ; ; R64-LABEL: minnum_f32: ; R64: # %bb.0: @@ -231,12 +216,7 @@ ; ; R64-LABEL: minnum_f64: ; R64: # %bb.0: -; R64-NEXT: addi sp, sp, -16 -; R64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; R64-NEXT: call fmin@plt -; R64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; R64-NEXT: addi sp, sp, 16 -; R64-NEXT: ret +; R64-NEXT: tail fmin@plt %r = call double @llvm.minnum.f64(double %x, double %y) ret double %r } diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -2267,14 +2267,9 @@ ; ; RV32I-LABEL: fcvt_s_h: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 -; RV32I-NEXT: call __extendhfsf2@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 -; RV32I-NEXT: ret +; RV32I-NEXT: tail __extendhfsf2@plt ; ; RV64I-LABEL: fcvt_s_h: ; RV64I: # %bb.0: @@ -2391,13 +2386,8 @@ ; ; RV64IZFH-LABEL: fcvt_d_h: ; RV64IZFH: # %bb.0: -; RV64IZFH-NEXT: addi sp, sp, -16 -; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: call __extendsfdf2@plt -; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IZFH-NEXT: addi sp, sp, 16 -; RV64IZFH-NEXT: ret +; RV64IZFH-NEXT: tail __extendsfdf2@plt ; ; RV32IDZFH-LABEL: fcvt_d_h: ; RV32IDZFH: # %bb.0: @@ -2430,10 +2420,9 @@ ; RV64I-NEXT: call __extendhfsf2@plt ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 -; RV64I-NEXT: call __extendsfdf2@plt ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: ret +; RV64I-NEXT: tail __extendsfdf2@plt ; ; RV32IFZFHMIN-LABEL: fcvt_d_h: ; RV32IFZFHMIN: # %bb.0: @@ -2447,13 +2436,8 @@ ; ; RV64IFZFHMIN-LABEL: fcvt_d_h: ; RV64IFZFHMIN: # %bb.0: -; RV64IFZFHMIN-NEXT: addi sp, sp, -16 -; RV64IFZFHMIN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IFZFHMIN-NEXT: fcvt.s.h fa0, fa0 -; RV64IFZFHMIN-NEXT: call __extendsfdf2@plt -; RV64IFZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IFZFHMIN-NEXT: addi sp, sp, 16 -; RV64IFZFHMIN-NEXT: ret +; RV64IFZFHMIN-NEXT: tail __extendsfdf2@plt ; ; RV32IDZFHMIN-LABEL: fcvt_d_h: ; RV32IDZFHMIN: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/libcall-tail-calls.ll b/llvm/test/CodeGen/RISCV/libcall-tail-calls.ll --- a/llvm/test/CodeGen/RISCV/libcall-tail-calls.ll +++ b/llvm/test/CodeGen/RISCV/libcall-tail-calls.ll @@ -167,10 +167,9 @@ ; RV32I-ILP32-NEXT: srli a0, a0, 16 ; RV32I-ILP32-NEXT: call __extendhfsf2@plt ; RV32I-ILP32-NEXT: call sinf@plt -; RV32I-ILP32-NEXT: call __truncsfhf2@plt ; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-ILP32-NEXT: addi sp, sp, 16 -; RV32I-ILP32-NEXT: ret +; RV32I-ILP32-NEXT: tail __truncsfhf2@plt ; ; RV64IFD-LP64D-LABEL: sin_f16: ; RV64IFD-LP64D: # %bb.0: @@ -223,10 +222,9 @@ ; RV64I-LP64-NEXT: srli a0, a0, 48 ; RV64I-LP64-NEXT: call __extendhfsf2@plt ; RV64I-LP64-NEXT: call sinf@plt -; RV64I-LP64-NEXT: call __truncsfhf2@plt ; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-LP64-NEXT: addi sp, sp, 16 -; RV64I-LP64-NEXT: ret +; RV64I-LP64-NEXT: tail __truncsfhf2@plt %1 = call half @llvm.sin.f16(half %a) ret half %1 } @@ -236,23 +234,14 @@ declare float @llvm.sin.f32(float) define float @sin_f32(float %a) nounwind { +; RV32-ALL-LABEL: sin_f32: +; RV32-ALL: # %bb.0: +; RV32-ALL-NEXT: tail sinf@plt +; ; F-ABI-ALL-LABEL: sin_f32: ; F-ABI-ALL: # %bb.0: ; F-ABI-ALL-NEXT: tail sinf@plt ; -; RV32IFD-ILP32-LABEL: sin_f32: -; RV32IFD-ILP32: # %bb.0: -; RV32IFD-ILP32-NEXT: tail sinf@plt -; -; RV32I-ILP32-LABEL: sin_f32: -; RV32I-ILP32: # %bb.0: -; RV32I-ILP32-NEXT: addi sp, sp, -16 -; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-ILP32-NEXT: call sinf@plt -; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-ILP32-NEXT: addi sp, sp, 16 -; RV32I-ILP32-NEXT: ret -; ; RV64-LP64-ALL-LABEL: sin_f32: ; RV64-LP64-ALL: # %bb.0: ; RV64-LP64-ALL-NEXT: addi sp, sp, -16 @@ -268,26 +257,9 @@ declare float @llvm.powi.f32.i32(float, i32) define float @powi_f32(float %a, i32 %b) nounwind { -; RV32IFD-ILP32D-LABEL: powi_f32: -; RV32IFD-ILP32D: # %bb.0: -; RV32IFD-ILP32D-NEXT: tail __powisf2@plt -; -; RV32IF-ILP32F-LABEL: powi_f32: -; RV32IF-ILP32F: # %bb.0: -; RV32IF-ILP32F-NEXT: tail __powisf2@plt -; -; RV32IFD-ILP32-LABEL: powi_f32: -; RV32IFD-ILP32: # %bb.0: -; RV32IFD-ILP32-NEXT: tail __powisf2@plt -; -; RV32I-ILP32-LABEL: powi_f32: -; RV32I-ILP32: # %bb.0: -; RV32I-ILP32-NEXT: addi sp, sp, -16 -; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-ILP32-NEXT: call __powisf2@plt -; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-ILP32-NEXT: addi sp, sp, 16 -; RV32I-ILP32-NEXT: ret +; RV32-ALL-LABEL: powi_f32: +; RV32-ALL: # %bb.0: +; RV32-ALL-NEXT: tail __powisf2@plt ; ; RV64IFD-LP64D-LABEL: powi_f32: ; RV64IFD-LP64D: # %bb.0: @@ -352,12 +324,7 @@ ; ; RV64I-LP64-LABEL: llround_f32: ; RV64I-LP64: # %bb.0: -; RV64I-LP64-NEXT: addi sp, sp, -16 -; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-LP64-NEXT: call llroundf@plt -; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-LP64-NEXT: addi sp, sp, 16 -; RV64I-LP64-NEXT: ret +; RV64I-LP64-NEXT: tail llroundf@plt %1 = call i64 @llvm.llround.i64.f32(float %a) ret i64 %1 } @@ -389,27 +356,9 @@ ; RV32-ILP32-ALL-NEXT: addi sp, sp, 16 ; RV32-ILP32-ALL-NEXT: ret ; -; RV64IF-LP64F-LABEL: sin_f64: -; RV64IF-LP64F: # %bb.0: -; RV64IF-LP64F-NEXT: addi sp, sp, -16 -; RV64IF-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IF-LP64F-NEXT: call sin@plt -; RV64IF-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IF-LP64F-NEXT: addi sp, sp, 16 -; RV64IF-LP64F-NEXT: ret -; -; RV64IFD-LP64-LABEL: sin_f64: -; RV64IFD-LP64: # %bb.0: -; RV64IFD-LP64-NEXT: tail sin@plt -; -; RV64I-LP64-LABEL: sin_f64: -; RV64I-LP64: # %bb.0: -; RV64I-LP64-NEXT: addi sp, sp, -16 -; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-LP64-NEXT: call sin@plt -; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-LP64-NEXT: addi sp, sp, 16 -; RV64I-LP64-NEXT: ret +; RV64-ALL-LABEL: sin_f64: +; RV64-ALL: # %bb.0: +; RV64-ALL-NEXT: tail sin@plt %1 = call double @llvm.sin.f64(double %a) ret double %1 } @@ -451,23 +400,23 @@ ; ; RV64IF-LP64F-LABEL: powi_f64: ; RV64IF-LP64F: # %bb.0: -; RV64IF-LP64F-NEXT: addi sp, sp, -16 -; RV64IF-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IF-LP64F-NEXT: sext.w a1, a1 -; RV64IF-LP64F-NEXT: call __powidf2@plt -; RV64IF-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IF-LP64F-NEXT: addi sp, sp, 16 -; RV64IF-LP64F-NEXT: ret +; RV64IF-LP64F-NEXT: tail __powidf2@plt ; -; RV64-LP64-ALL-LABEL: powi_f64: -; RV64-LP64-ALL: # %bb.0: -; RV64-LP64-ALL-NEXT: addi sp, sp, -16 -; RV64-LP64-ALL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-LP64-ALL-NEXT: sext.w a1, a1 -; RV64-LP64-ALL-NEXT: call __powidf2@plt -; RV64-LP64-ALL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-LP64-ALL-NEXT: addi sp, sp, 16 -; RV64-LP64-ALL-NEXT: ret +; RV64IFD-LP64-LABEL: powi_f64: +; RV64IFD-LP64: # %bb.0: +; RV64IFD-LP64-NEXT: addi sp, sp, -16 +; RV64IFD-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IFD-LP64-NEXT: sext.w a1, a1 +; RV64IFD-LP64-NEXT: call __powidf2@plt +; RV64IFD-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IFD-LP64-NEXT: addi sp, sp, 16 +; RV64IFD-LP64-NEXT: ret +; +; RV64I-LP64-LABEL: powi_f64: +; RV64I-LP64: # %bb.0: +; RV64I-LP64-NEXT: sext.w a1, a1 +; RV64I-LP64-NEXT: tail __powidf2@plt %1 = call double @llvm.powi.f64.i32(double %a, i32 %b) ret double %1 } @@ -491,12 +440,7 @@ ; ; RV64IF-LP64F-LABEL: llround_f64: ; RV64IF-LP64F: # %bb.0: -; RV64IF-LP64F-NEXT: addi sp, sp, -16 -; RV64IF-LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IF-LP64F-NEXT: call llround@plt -; RV64IF-LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64IF-LP64F-NEXT: addi sp, sp, 16 -; RV64IF-LP64F-NEXT: ret +; RV64IF-LP64F-NEXT: tail llround@plt ; ; RV64IFD-LP64-LABEL: llround_f64: ; RV64IFD-LP64: # %bb.0: @@ -506,12 +450,7 @@ ; ; RV64I-LP64-LABEL: llround_f64: ; RV64I-LP64: # %bb.0: -; RV64I-LP64-NEXT: addi sp, sp, -16 -; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-LP64-NEXT: call llround@plt -; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-LP64-NEXT: addi sp, sp, 16 -; RV64I-LP64-NEXT: ret +; RV64I-LP64-NEXT: tail llround@plt %1 = call i64 @llvm.llround.i64.f64(double %a) ret i64 %1 } diff --git a/llvm/test/CodeGen/RISCV/rv32i-rv64i-float-double.ll b/llvm/test/CodeGen/RISCV/rv32i-rv64i-float-double.ll --- a/llvm/test/CodeGen/RISCV/rv32i-rv64i-float-double.ll +++ b/llvm/test/CodeGen/RISCV/rv32i-rv64i-float-double.ll @@ -20,11 +20,10 @@ ; RV32IF-NEXT: mv s0, a1 ; RV32IF-NEXT: call __addsf3@plt ; RV32IF-NEXT: mv a1, s0 -; RV32IF-NEXT: call __divsf3@plt ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 -; RV32IF-NEXT: ret +; RV32IF-NEXT: tail __divsf3@plt ; ; RV64IF-LABEL: float_test: ; RV64IF: # %bb.0: @@ -71,11 +70,10 @@ ; RV64IF-NEXT: mv s0, a1 ; RV64IF-NEXT: call __adddf3@plt ; RV64IF-NEXT: mv a1, s0 -; RV64IF-NEXT: call __divdf3@plt ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 -; RV64IF-NEXT: ret +; RV64IF-NEXT: tail __divdf3@plt %1 = fadd double %a, %b %2 = fdiv double %1, %b ret double %2