diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -181,6 +181,8 @@ bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override; + bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; + private: /// Target-specific function used to lower LoongArch calling conventions. typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2330,6 +2330,39 @@ return CI->isTailCall(); } +// Check if the return value is used as only a return value, as otherwise +// we can't perform a tail-call. +bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N, + SDValue &Chain) const { + if (N->getNumValues() != 1) + return false; + if (!N->hasNUsesOfValue(1, 0)) + return false; + + SDNode *Copy = *N->use_begin(); + if (Copy->getOpcode() != ISD::CopyToReg) + return false; + + // If the ISD::CopyToReg has a glue operand, we conservatively assume it + // isn't safe to perform a tail call. + if (Copy->getGluedNode()) + return false; + + // The copy must be used by a LoongArchISD::RET, and nothing else. + bool HasRet = false; + for (SDNode *Node : Copy->uses()) { + if (Node->getOpcode() != LoongArchISD::RET) + return false; + HasRet = true; + } + + if (!HasRet) + return false; + + Chain = Copy->getOperand(0); + return true; +} + // Check whether the call is eligible for tail call optimization. bool LoongArchTargetLowering::isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, diff --git a/llvm/test/CodeGen/LoongArch/fp-expand.ll b/llvm/test/CodeGen/LoongArch/fp-expand.ll --- a/llvm/test/CodeGen/LoongArch/fp-expand.ll +++ b/llvm/test/CodeGen/LoongArch/fp-expand.ll @@ -14,21 +14,11 @@ define float @sin_f32(float %a) nounwind { ; LA32-LABEL: sin_f32: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl %plt(sinf) -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-NEXT: b %plt(sinf) ; ; LA64-LABEL: sin_f32: ; LA64: # %bb.0: -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: bl %plt(sinf) -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-NEXT: b %plt(sinf) %1 = call float @llvm.sin.f32(float %a) ret float %1 } @@ -36,21 +26,11 @@ define float @cos_f32(float %a) nounwind { ; LA32-LABEL: cos_f32: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl %plt(cosf) -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-NEXT: b %plt(cosf) ; ; LA64-LABEL: cos_f32: ; LA64: # %bb.0: -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: bl %plt(cosf) -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-NEXT: b %plt(cosf) %1 = call float @llvm.cos.f32(float %a) ret float %1 } @@ -100,21 +80,11 @@ define float @pow_f32(float %a, float %b) nounwind { ; LA32-LABEL: pow_f32: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl %plt(powf) -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-NEXT: b %plt(powf) ; ; LA64-LABEL: pow_f32: ; LA64: # %bb.0: -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: bl %plt(powf) -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-NEXT: b %plt(powf) %1 = call float @llvm.pow.f32(float %a, float %b) ret float %1 } @@ -122,21 +92,11 @@ define float @frem_f32(float %a, float %b) nounwind { ; LA32-LABEL: frem_f32: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl %plt(fmodf) -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-NEXT: b %plt(fmodf) ; ; LA64-LABEL: frem_f32: ; LA64: # %bb.0: -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: bl %plt(fmodf) -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-NEXT: b %plt(fmodf) %1 = frem float %a, %b ret float %1 } @@ -144,21 +104,11 @@ define double @sin_f64(double %a) nounwind { ; LA32-LABEL: sin_f64: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl %plt(sin) -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-NEXT: b %plt(sin) ; ; LA64-LABEL: sin_f64: ; LA64: # %bb.0: -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: bl %plt(sin) -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-NEXT: b %plt(sin) %1 = call double @llvm.sin.f64(double %a) ret double %1 } @@ -166,21 +116,11 @@ define double @cos_f64(double %a) nounwind { ; LA32-LABEL: cos_f64: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl %plt(cos) -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-NEXT: b %plt(cos) ; ; LA64-LABEL: cos_f64: ; LA64: # %bb.0: -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: bl %plt(cos) -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-NEXT: b %plt(cos) %1 = call double @llvm.cos.f64(double %a) ret double %1 } @@ -230,21 +170,11 @@ define double @pow_f64(double %a, double %b) nounwind { ; LA32-LABEL: pow_f64: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl %plt(pow) -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-NEXT: b %plt(pow) ; ; LA64-LABEL: pow_f64: ; LA64: # %bb.0: -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: bl %plt(pow) -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-NEXT: b %plt(pow) %1 = call double @llvm.pow.f64(double %a, double %b) ret double %1 } @@ -252,21 +182,11 @@ define double @frem_f64(double %a, double %b) nounwind { ; LA32-LABEL: frem_f64: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bl %plt(fmod) -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 -; LA32-NEXT: ret +; LA32-NEXT: b %plt(fmod) ; ; LA64-LABEL: frem_f64: ; LA64: # %bb.0: -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: bl %plt(fmod) -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 -; LA64-NEXT: ret +; LA64-NEXT: b %plt(fmod) %1 = frem double %a, %b ret double %1 } diff --git a/llvm/test/CodeGen/LoongArch/frint.ll b/llvm/test/CodeGen/LoongArch/frint.ll --- a/llvm/test/CodeGen/LoongArch/frint.ll +++ b/llvm/test/CodeGen/LoongArch/frint.ll @@ -7,21 +7,11 @@ define float @rint_f32(float %f) nounwind { ; LA32F-LABEL: rint_f32: ; LA32F: # %bb.0: # %entry -; LA32F-NEXT: addi.w $sp, $sp, -16 -; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32F-NEXT: bl %plt(rintf) -; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32F-NEXT: addi.w $sp, $sp, 16 -; LA32F-NEXT: ret +; LA32F-NEXT: b %plt(rintf) ; ; LA32D-LABEL: rint_f32: ; LA32D: # %bb.0: # %entry -; LA32D-NEXT: addi.w $sp, $sp, -16 -; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32D-NEXT: bl %plt(rintf) -; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32D-NEXT: addi.w $sp, $sp, 16 -; LA32D-NEXT: ret +; LA32D-NEXT: b %plt(rintf) ; ; LA64F-LABEL: rint_f32: ; LA64F: # %bb.0: # %entry @@ -51,12 +41,7 @@ ; ; LA32D-LABEL: rint_f64: ; LA32D: # %bb.0: # %entry -; LA32D-NEXT: addi.w $sp, $sp, -16 -; LA32D-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32D-NEXT: bl %plt(rint) -; LA32D-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32D-NEXT: addi.w $sp, $sp, 16 -; LA32D-NEXT: ret +; LA32D-NEXT: b %plt(rint) ; ; LA64F-LABEL: rint_f64: ; LA64F: # %bb.0: # %entry