diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -176,6 +176,7 @@ Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override; + bool mayBeEmittedAsTailCall(const CallInst *CI) const override; bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -2326,6 +2326,10 @@ return Chain; } +bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { + return CI->isTailCall(); +} + // Check whether the call is eligible for tail call optimization. bool LoongArchTargetLowering::isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, diff --git a/llvm/test/CodeGen/LoongArch/bnez-beqz.ll b/llvm/test/CodeGen/LoongArch/bnez-beqz.ll --- a/llvm/test/CodeGen/LoongArch/bnez-beqz.ll +++ b/llvm/test/CodeGen/LoongArch/bnez-beqz.ll @@ -7,27 +7,19 @@ define void @bnez_i32(i32 signext %0) nounwind { ; LA32-LABEL: bnez_i32: ; LA32: # %bb.0: # %start -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill -; LA32-NEXT: bnez $a0, .LBB0_2 -; LA32-NEXT: # %bb.1: # %t -; LA32-NEXT: bl %plt(bar) -; LA32-NEXT: .LBB0_2: # %f -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: beqz $a0, .LBB0_2 +; LA32-NEXT: # %bb.1: # %f ; LA32-NEXT: ret +; LA32-NEXT: .LBB0_2: # %t +; LA32-NEXT: b %plt(bar) ; ; LA64-LABEL: bnez_i32: ; LA64: # %bb.0: # %start -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: bnez $a0, .LBB0_2 -; LA64-NEXT: # %bb.1: # %t -; LA64-NEXT: bl %plt(bar) -; LA64-NEXT: .LBB0_2: # %f -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: beqz $a0, .LBB0_2 +; LA64-NEXT: # %bb.1: # %f ; LA64-NEXT: ret +; LA64-NEXT: .LBB0_2: # %t +; LA64-NEXT: b %plt(bar) start: %1 = icmp eq i32 %0, 0 br i1 %1, label %t, label %f @@ -43,26 +35,18 @@ define void @beqz_i32(i32 signext %0) nounwind { ; LA32-LABEL: beqz_i32: ; LA32: # %bb.0: # %start -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: beqz $a0, .LBB1_2 ; LA32-NEXT: # %bb.1: # %t -; LA32-NEXT: bl %plt(bar) +; LA32-NEXT: b %plt(bar) ; LA32-NEXT: .LBB1_2: # %f -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; ; LA64-LABEL: beqz_i32: ; LA64: # %bb.0: # %start -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ; LA64-NEXT: beqz $a0, .LBB1_2 ; LA64-NEXT: # %bb.1: # %t -; LA64-NEXT: bl %plt(bar) +; LA64-NEXT: b %plt(bar) ; LA64-NEXT: .LBB1_2: # %f -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 ; LA64-NEXT: ret start: %1 = icmp ne i32 %0, 0 @@ -79,28 +63,20 @@ define void @bnez_i64(i64 %0) nounwind { ; LA32-LABEL: bnez_i64: ; LA32: # %bb.0: # %start -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: or $a0, $a0, $a1 -; LA32-NEXT: bnez $a0, .LBB2_2 -; LA32-NEXT: # %bb.1: # %t -; LA32-NEXT: bl %plt(bar) -; LA32-NEXT: .LBB2_2: # %f -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: beqz $a0, .LBB2_2 +; LA32-NEXT: # %bb.1: # %f ; LA32-NEXT: ret +; LA32-NEXT: .LBB2_2: # %t +; LA32-NEXT: b %plt(bar) ; ; LA64-LABEL: bnez_i64: ; LA64: # %bb.0: # %start -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; LA64-NEXT: bnez $a0, .LBB2_2 -; LA64-NEXT: # %bb.1: # %t -; LA64-NEXT: bl %plt(bar) -; LA64-NEXT: .LBB2_2: # %f -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: beqz $a0, .LBB2_2 +; LA64-NEXT: # %bb.1: # %f ; LA64-NEXT: ret +; LA64-NEXT: .LBB2_2: # %t +; LA64-NEXT: b %plt(bar) start: %1 = icmp eq i64 %0, 0 br i1 %1, label %t, label %f @@ -116,27 +92,19 @@ define void @beqz_i64(i64 %0) nounwind { ; LA32-LABEL: beqz_i64: ; LA32: # %bb.0: # %start -; LA32-NEXT: addi.w $sp, $sp, -16 -; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill ; LA32-NEXT: or $a0, $a0, $a1 ; LA32-NEXT: beqz $a0, .LBB3_2 ; LA32-NEXT: # %bb.1: # %t -; LA32-NEXT: bl %plt(bar) +; LA32-NEXT: b %plt(bar) ; LA32-NEXT: .LBB3_2: # %f -; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; ; LA64-LABEL: beqz_i64: ; LA64: # %bb.0: # %start -; LA64-NEXT: addi.d $sp, $sp, -16 -; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ; LA64-NEXT: beqz $a0, .LBB3_2 ; LA64-NEXT: # %bb.1: # %t -; LA64-NEXT: bl %plt(bar) +; LA64-NEXT: b %plt(bar) ; LA64-NEXT: .LBB3_2: # %f -; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 16 ; LA64-NEXT: ret start: %1 = icmp ne i64 %0, 0 diff --git a/llvm/test/CodeGen/LoongArch/duplicate-returns-for-tailcall.ll b/llvm/test/CodeGen/LoongArch/duplicate-returns-for-tailcall.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/duplicate-returns-for-tailcall.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +;; Perform tail call optimization for duplicate returns. +declare i32 @test() +declare i32 @test1() +declare i32 @test2() +declare i32 @test3() +define i32 @duplicate_returns(i32 %a, i32 %b) nounwind { +; CHECK-LABEL: duplicate_returns: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bstrpick.d $a2, $a0, 31, 0 +; CHECK-NEXT: beqz $a2, .LBB0_4 +; CHECK-NEXT: # %bb.1: # %if.else +; CHECK-NEXT: bstrpick.d $a2, $a1, 31, 0 +; CHECK-NEXT: beqz $a2, .LBB0_5 +; CHECK-NEXT: # %bb.2: # %if.else2 +; CHECK-NEXT: addi.w $a0, $a0, 0 +; CHECK-NEXT: addi.w $a1, $a1, 0 +; CHECK-NEXT: bge $a1, $a0, .LBB0_6 +; CHECK-NEXT: # %bb.3: # %if.then3 +; CHECK-NEXT: b %plt(test2) +; CHECK-NEXT: .LBB0_4: # %if.then +; CHECK-NEXT: b %plt(test) +; CHECK-NEXT: .LBB0_5: # %if.then2 +; CHECK-NEXT: b %plt(test1) +; CHECK-NEXT: .LBB0_6: # %if.else3 +; CHECK-NEXT: b %plt(test3) +entry: + %cmp = icmp eq i32 %a, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %call = tail call i32 @test() + br label %return + +if.else: + %cmp1 = icmp eq i32 %b, 0 + br i1 %cmp1, label %if.then2, label %if.else2 + +if.then2: + %call1 = tail call i32 @test1() + br label %return + +if.else2: + %cmp5 = icmp sgt i32 %a, %b + br i1 %cmp5, label %if.then3, label %if.else3 + +if.then3: + %call2 = tail call i32 @test2() + br label %return + +if.else3: + %call3 = tail call i32 @test3() + br label %return + +return: + %retval = phi i32 [ %call, %if.then ], [ %call1, %if.then2 ], [ %call2, %if.then3 ], [ %call3, %if.else3 ] + ret i32 %retval +}