diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -706,7 +706,8 @@ bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, - const SmallVector &ArgLocs) const; + const SmallVector &ArgLocs, const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins) const; /// Generate error diagnostics if any register used by CC has been marked /// reserved. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11265,12 +11265,44 @@ return Chain; } +/// MatchingStackOffset - Return true if the given stack call argument is +/// already available in the same position (relatively) of the caller's +/// incoming argument stack. +static +bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, + MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, + const TargetInstrInfo *TII) { + unsigned Bytes = Arg.getValueSizeInBits() / 8; + int FI = std::numeric_limits::max(); + if (LoadSDNode *Ld = dyn_cast(Arg)) { + if (Flags.isByVal()) + // ByVal argument is passed in as a pointer but it's now being + // dereferenced. e.g. + // define @foo(%struct.X* %A) { + // tail call @bar(%struct.X* byval %A) + // } + return false; + SDValue Ptr = Ld->getBasePtr(); + FrameIndexSDNode *FINode = dyn_cast(Ptr); + if (!FINode) + return false; + FI = FINode->getIndex(); + } else + return false; + + assert(FI != std::numeric_limits::max()); + if (!MFI.isFixedObjectIndex(FI)) + return false; + return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); +} + /// isEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. bool RISCVTargetLowering::isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, - const SmallVector &ArgLocs) const { + const SmallVector &ArgLocs, const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins) const { auto &Callee = CLI.Callee; auto CalleeCC = CLI.CallConv; @@ -11286,9 +11318,24 @@ if (Caller.hasFnAttribute("interrupt")) return false; - // Do not tail call opt if the stack is used to pass parameters. - if (CCInfo.getNextStackOffset() != 0) - return false; + if (CCInfo.getNextStackOffset() != 0) { + MachineFrameInfo &MFI = MF.getFrameInfo(); + const MachineRegisterInfo *MRI = &MF.getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + for (unsigned I = 0, RealArgIdx = 0, E = ArgLocs.size(); + I != E; ++I, ++RealArgIdx) { + const CCValAssign &VA = ArgLocs[I]; + SDValue Arg = OutVals[RealArgIdx]; + ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags; + if (VA.getLocInfo() == CCValAssign::Indirect) + return false; + else if (!VA.isRegLoc()) { + if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI, TII)) + return false; + } + } + + } // Do not tail call opt if any parameters need to be passed indirectly. // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are @@ -11375,7 +11422,7 @@ // Check if it's really possible to do a tail call. if (IsTailCall) - IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); + IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs, OutVals, Ins); if (IsTailCall) ++NumTailCalls; @@ -11511,10 +11558,8 @@ if (VA.isRegLoc()) { // Queue up the argument copies and emit them at the end. RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); - } else { + } else if (!IsTailCall) { assert(VA.isMemLoc() && "Argument not register or memory"); - assert(!IsTailCall && "Tail call not allowed if stack is used " - "for passing parameters"); // Work out the address of the stack slot. if (!StackPtr.getNode()) diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll --- a/llvm/test/CodeGen/RISCV/tail-calls.ll +++ b/llvm/test/CodeGen/RISCV/tail-calls.ll @@ -113,29 +113,11 @@ ret void } -; Do not tail call optimize if stack is used to pass parameters. declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) nounwind { ; CHECK-LABEL: caller_args: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -32 -; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; CHECK-NEXT: lw t0, 32(sp) -; CHECK-NEXT: lw t1, 36(sp) -; CHECK-NEXT: lw t2, 40(sp) -; CHECK-NEXT: lw t3, 44(sp) -; CHECK-NEXT: lw t4, 48(sp) -; CHECK-NEXT: lw t5, 52(sp) -; CHECK-NEXT: sw t5, 20(sp) -; CHECK-NEXT: sw t4, 16(sp) -; CHECK-NEXT: sw t3, 12(sp) -; CHECK-NEXT: sw t2, 8(sp) -; CHECK-NEXT: sw t1, 4(sp) -; CHECK-NEXT: sw t0, 0(sp) -; CHECK-NEXT: call callee_args@plt -; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 32 -; CHECK-NEXT: ret +; CHECK-NEXT: tail callee_args@plt entry: %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) ret i32 %r