Index: lib/Target/Sparc/DelaySlotFiller.cpp =================================================================== --- lib/Target/Sparc/DelaySlotFiller.cpp +++ lib/Target/Sparc/DelaySlotFiller.cpp @@ -176,17 +176,20 @@ if (slot == MBB.begin()) return MBB.end(); - if (slot->getOpcode() == SP::RET || slot->getOpcode() == SP::TLS_CALL) + unsigned Opc = slot->getOpcode(); + + if (Opc == SP::RET || Opc == SP::TLS_CALL) return MBB.end(); - if (slot->getOpcode() == SP::RETL) { + if (Opc == SP::RETL || Opc == SP::TAIL_CALL || Opc == SP::TAIL_CALLrr) { MachineBasicBlock::iterator J = slot; --J; if (J->getOpcode() == SP::RESTORErr || J->getOpcode() == SP::RESTOREri) { // change retl to ret. - slot->setDesc(Subtarget->getInstrInfo()->get(SP::RET)); + if (Opc == SP::RETL) + slot->setDesc(Subtarget->getInstrInfo()->get(SP::RET)); return J; } } @@ -363,6 +366,8 @@ case SP::CALLrr: case SP::CALLri: structSizeOpNum = 2; break; case SP::TLS_CALL: return false; + case SP::TAIL_CALLrr: + case SP::TAIL_CALL: return false; } const MachineOperand &MO = I->getOperand(structSizeOpNum); Index: lib/Target/Sparc/SparcCallingConv.td =================================================================== --- lib/Target/Sparc/SparcCallingConv.td +++ lib/Target/Sparc/SparcCallingConv.td @@ -135,7 +135,7 @@ // Callee-saved registers are handled by the register window mechanism. def CSR : CalleeSavedRegs<(add)> { let OtherPreserved = (add (sequence "I%u", 0, 7), - (sequence "L%u", 0, 7)); + (sequence "L%u", 0, 7), O6); } // Callee-saved registers for calls with ReturnsTwice attribute. Index: lib/Target/Sparc/SparcFrameLowering.cpp =================================================================== --- lib/Target/Sparc/SparcFrameLowering.cpp +++ lib/Target/Sparc/SparcFrameLowering.cpp @@ -224,8 +224,9 @@ const SparcInstrInfo &TII = *static_cast(MF.getSubtarget().getInstrInfo()); DebugLoc dl = MBBI->getDebugLoc(); - assert(MBBI->getOpcode() == SP::RETL && - "Can only put epilog before 'retl' instruction!"); + assert((MBBI->getOpcode() == SP::RETL || MBBI->getOpcode() == SP::TAIL_CALL || + MBBI->getOpcode() == SP::TAIL_CALLrr) && + "Can only put epilog before 'retl' or 'tail_call' instruction!"); if (!FuncInfo->isLeafProc()) { BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0) .addReg(SP::G0); @@ -234,10 +235,19 @@ MachineFrameInfo &MFI = MF.getFrameInfo(); int NumBytes = (int) MFI.getStackSize(); - if (NumBytes == 0) - return; - - emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri); + if (NumBytes != 0) + emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri); + + // Preserve return address in %o7 + if (MBBI->getOpcode() == SP::TAIL_CALL) { + MBB.addLiveIn(SP::O7); + BuildMI(MBB, MBBI, dl, TII.get(SP::ORrr), SP::G1) + .addReg(SP::G0) + .addReg(SP::O7); + BuildMI(MBB, MBBI, dl, TII.get(SP::ORrr), SP::O7) + .addReg(SP::G0) + .addReg(SP::G1); + } } bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { Index: lib/Target/Sparc/SparcISelLowering.h =================================================================== --- lib/Target/Sparc/SparcISelLowering.h +++ lib/Target/Sparc/SparcISelLowering.h @@ -48,6 +48,8 @@ GLOBAL_BASE_REG, // Global base reg for PIC. FLUSHW, // FLUSH register windows to stack. + TAIL_CALL, // Tail call + TLS_ADD, // For Thread Local Storage (TLS). TLS_LD, TLS_CALL @@ -191,6 +193,10 @@ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + bool IsEligibleForTailCallOptimization(CCState &CCInfo, + CallLoweringInfo &CLI, + MachineFunction &MF) const; + bool ShouldShrinkFPConstant(EVT VT) const override { // Do not shrink FP constpool if VT == MVT::f128. // (ldd, call _Q_fdtoq) is more expensive than two ldds. Index: lib/Target/Sparc/SparcISelLowering.cpp =================================================================== --- lib/Target/Sparc/SparcISelLowering.cpp +++ lib/Target/Sparc/SparcISelLowering.cpp @@ -712,6 +712,31 @@ return CalleeFn->hasFnAttribute(Attribute::ReturnsTwice); } +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. +bool SparcTargetLowering::IsEligibleForTailCallOptimization( + CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF) const { + + auto &Outs = CLI.Outs; + auto &Caller = MF.getFunction(); + + // Do not tail call opt functions with "disable-tail-calls" attribute. + if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true") + return false; + + // Do not tail call opt if the stack is used to pass parameters. + if (CCInfo.getNextStackOffset() != 0) + return false; + + // Byval parameters hand the function a pointer directly into the stack area + // we want to reuse during a tail call. + for (auto &Arg : Outs) + if (Arg.Flags.isByVal()) + return false; + + return true; +} + // Lower a call for the 32-bit ABI. SDValue SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, @@ -727,15 +752,15 @@ CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; - // Sparc target does not yet support tail call optimization. - isTailCall = false; - // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32); + isTailCall = isTailCall && IsEligibleForTailCallOptimization( + CCInfo, CLI, DAG.getMachineFunction()); + // Get the size of the outgoing arguments stack space requirement. unsigned ArgsSize = CCInfo.getNextStackOffset(); @@ -773,7 +798,10 @@ } } - Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, dl); + assert(!isTailCall || ArgsSize == 0); + + if (!isTailCall) + Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, dl); SmallVector, 8> RegsToPass; SmallVector MemOpChains; @@ -818,6 +846,10 @@ if (Flags.isSRet()) { assert(VA.needsCustom()); + + if (isTailCall) + continue; + // store SRet argument in %sp+64 SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(64, dl); @@ -933,7 +965,9 @@ // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - unsigned Reg = toCallerWindow(RegsToPass[i].first); + unsigned Reg = RegsToPass[i].first; + if (!isTailCall) + Reg = toCallerWindow(Reg); Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } @@ -956,9 +990,12 @@ Ops.push_back(Callee); if (hasStructRetAttr) Ops.push_back(DAG.getTargetConstant(SRetArgSize, dl, MVT::i32)); - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first), - RegsToPass[i].second.getValueType())); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + unsigned Reg = RegsToPass[i].first; + if (!isTailCall) + Reg = toCallerWindow(Reg); + Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType())); + } // Add a register mask operand representing the call-preserved registers. const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo(); @@ -972,6 +1009,11 @@ if (InFlag.getNode()) Ops.push_back(InFlag); + if (isTailCall) { + DAG.getMachineFunction().getFrameInfo().setHasTailCall(); + return DAG.getNode(SPISD::TAIL_CALL, dl, MVT::Other, Ops); + } + Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops); InFlag = Chain.getValue(1); @@ -1838,6 +1880,7 @@ case SPISD::TLS_ADD: return "SPISD::TLS_ADD"; case SPISD::TLS_LD: return "SPISD::TLS_LD"; case SPISD::TLS_CALL: return "SPISD::TLS_CALL"; + case SPISD::TAIL_CALL: return "SPISD::TAIL_CALL"; } return nullptr; } Index: lib/Target/Sparc/SparcInstrInfo.td =================================================================== --- lib/Target/Sparc/SparcInstrInfo.td +++ lib/Target/Sparc/SparcInstrInfo.td @@ -213,6 +213,10 @@ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def tailcall : SDNode<"SPISD::TAIL_CALL", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; @@ -1343,6 +1347,31 @@ } } +//===----------------------------------------------------------------------===// +// Instructions for tail calls. +//===----------------------------------------------------------------------===// +let isCodeGenOnly = 1, isReturn = 1, hasDelaySlot = 1, + isTerminator = 1, isBarrier = 1 in { + def TAIL_CALL : InstSP<(outs), (ins calltarget:$disp, variable_ops), + "call $disp", + [(tailcall tglobaladdr:$disp)]> { + bits<30> disp; + let op = 1; + let Inst{29-0} = disp; + } +} + +def : Pat<(tailcall (iPTR texternalsym:$dst)), + (TAIL_CALL texternalsym:$dst)>; + +let isCodeGenOnly = 1, isReturn = 1, hasDelaySlot = 1, isTerminator = 1, + isBarrier = 1, rd = 0 in { + def TAIL_CALLrr : F3_1<2, 0b111000, + (outs), (ins MEMrr:$ptr, variable_ops), + "jmp $ptr", + [(tailcall ADDRrr:$ptr)]>; +} + //===----------------------------------------------------------------------===// // V9 Instructions //===----------------------------------------------------------------------===// Index: test/CodeGen/SPARC/2011-01-11-Call.ll =================================================================== --- test/CodeGen/SPARC/2011-01-11-Call.ll +++ test/CodeGen/SPARC/2011-01-11-Call.ll @@ -20,7 +20,7 @@ ; V9: ret ; V9-NEXT: restore -define void @test() nounwind { +define void @test() #0 { entry: %0 = tail call i32 (...) @foo() nounwind tail call void (...) @bar() nounwind @@ -31,13 +31,10 @@ declare void @bar(...) - ; V8-LABEL: test_tail_call_with_return -; V8: save %sp -; V8: call foo -; V8-NEXT: nop -; V8: ret -; V8-NEXT: restore %g0, %o0, %o0 +; V8: mov %o7, %g1 +; V8-NEXT: call foo +; V8-NEXT: mov %g1, %o7 ; V9-LABEL: test_tail_call_with_return ; V9: save %sp @@ -51,3 +48,5 @@ %0 = tail call i32 (...) @foo() nounwind ret i32 %0 } + +attributes #0 = { nounwind "disable-tail-calls"="true" } Index: test/CodeGen/SPARC/2011-01-19-DelaySlot.ll =================================================================== --- test/CodeGen/SPARC/2011-01-19-DelaySlot.ll +++ test/CodeGen/SPARC/2011-01-19-DelaySlot.ll @@ -3,7 +3,7 @@ target triple = "sparc-unknown-linux-gnu" -define i32 @test(i32 %a) nounwind { +define i32 @test(i32 %a) #0 { entry: ; CHECK: test ; CHECK: call bar @@ -14,7 +14,7 @@ ret i32 %0 } -define i32 @test_jmpl(i32 (i32, i32)* nocapture %f, i32 %a, i32 %b) nounwind { +define i32 @test_jmpl(i32 (i32, i32)* nocapture %f, i32 %a, i32 %b) #0 { entry: ; CHECK: test_jmpl ; CHECK: call @@ -53,7 +53,7 @@ ret i32 %a_addr.1.lcssa } -define i32 @test_inlineasm(i32 %a) nounwind { +define i32 @test_inlineasm(i32 %a) #0 { entry: ;CHECK-LABEL: test_inlineasm: ;CHECK: cmp @@ -79,7 +79,7 @@ declare i32 @bar(i32) -define i32 @test_implicit_def() nounwind { +define i32 @test_implicit_def() #0 { entry: ;UNOPT-LABEL: test_implicit_def: ;UNOPT: call func @@ -88,7 +88,7 @@ ret i32 0 } -define i32 @prevent_o7_in_call_delay_slot(i32 %i0) { +define i32 @prevent_o7_in_call_delay_slot(i32 %i0) #0 { entry: ;CHECK-LABEL: prevent_o7_in_call_delay_slot: ;CHECK: add %i0, 2, %o5 @@ -128,7 +128,7 @@ ret i32 %1 } -define i32 @restore_or(i32 %a) { +define i32 @restore_or(i32 %a) #0 { entry: ;CHECK-LABEL: restore_or: ;CHECK: ret @@ -184,3 +184,4 @@ ret i32 %2 } +attributes #0 = { nounwind "disable-tail-calls"="true" } Index: test/CodeGen/SPARC/tailcall.ll =================================================================== --- /dev/null +++ test/CodeGen/SPARC/tailcall.ll @@ -0,0 +1,143 @@ +; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s + +; CHECK-LABEL: simple_leaf +; CHECK: mov %o7, %g1 +; CHECK: call foo +; CHECK: mov %g1, %o7 + +define i32 @simple_leaf(i32 %i) #0 { +entry: + %call = tail call i32 @foo(i32 %i) + ret i32 %call +} + +; CHECK-LABEL: simple_standard +; CHECK: save %sp, -96, %sp +; CHECK: call foo +; CHECK: restore + +define i32 @simple_standard(i32 %i) #1 { +entry: + %call = tail call i32 @foo(i32 %i) + ret i32 %call +} + +; CHECK-LABEL: extra_arg_leaf +; CHECK: mov 12, %o1 +; CHECK: mov %o7, %g1 +; CHECK: call foo2 +; CHECK: mov %g1, %o7 + +define i32 @extra_arg_leaf(i32 %i) #0 { +entry: + %call = tail call i32 @foo2(i32 %i, i32 12) + ret i32 %call +} + +; CHECK-LABEL: extra_arg_standard +; CHECK: save %sp, -96, %sp +; CHECK: call foo2 +; CHECK: restore %g0, 12, %o1 + +define i32 @extra_arg_standard(i32 %i) #1 { +entry: + %call = tail call i32 @foo2(i32 %i, i32 12) + ret i32 %call +} + +; Perform tail call optimization for external symbol. + +; CHECK-LABEL: caller_extern +; CHECK: mov %o7, %g1 +; CHECK: call memcpy +; CHECK: mov %g1, %o7 + +define void @caller_extern(i8* %src) optsize #0 { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i32( + i8* getelementptr inbounds ([2 x i8], + [2 x i8]* @dest, i32 0, i32 0), + i8* %src, i32 7, i1 false) + ret void +} + +; Perform tail call optimization for function pointer. + +; CHECK-LABEL: func_ptr_test +; CHECK: jmp %o0 +; CHECK: nop + +define i32 @func_ptr_test(i32 ()* nocapture %func_ptr) #0 { +entry: + %call = tail call i32 %func_ptr() #1 + ret i32 %call +} + +; CHECK-LABEL: func_ptr_test2 +; CHECK: save %sp, -96, %sp +; CHECK: mov 10, %i3 +; CHECK: mov %i0, %i4 +; CHECK: mov %i1, %i0 +; CHECK: jmp %i4 +; CHECK: restore %g0, %i3, %o1 +define i32 @func_ptr_test2(i32 (i32, i32, i32)* nocapture %func_ptr, + i32 %r, i32 %q) #1 { +entry: + %call = tail call i32 %func_ptr(i32 %r, i32 10, i32 %q) #1 + ret i32 %call +} + + +; Do not tail call optimize if stack is used to pass parameters. + +; CHECK-LABEL: caller_args +; CHECK: ret + +define i32 @caller_args() #0 { +entry: + %r = tail call i32 @foo7(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) + ret i32 %r +} + +; Byval parameters hand the function a pointer directly into the stack area +; we want to reuse during a tail call. Do not tail call optimize functions with +; byval parameters. + +; CHECK-LABEL: caller_byval +; CHECK: ret + +define i32 @caller_byval() #0 { +entry: + %a = alloca i32* + %r = tail call i32 @callee_byval(i32** byval %a) + ret i32 %r +} + +; Perform tail call optimization for sret function. + +; CHECK-LABEL: sret_test +; CHECK: mov %o7, %g1 +; CHECK: call sret_func +; CHECK: mov %g1, %o7 + +define void @sret_test(%struct.a* noalias sret %agg.result) #0 { +entry: + tail call void bitcast (void (%struct.a*)* @sret_func to + void (%struct.a*)*)(%struct.a* sret %agg.result) + ret void +} + +%struct.a = type { i32, i32 } +@dest = global [2 x i8] zeroinitializer + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) +declare void @sret_func(%struct.a* sret) +declare i32 @callee_byval(i32** byval %a) +declare i32 @foo(i32) +declare i32 @foo2(i32, i32) +declare i32 @foo7(i32, i32, i32, i32, i32, i32, i32) + +attributes #0 = { nounwind "disable-tail-calls"="false" + "no-frame-pointer-elim"="false" } +attributes #1 = { nounwind "disable-tail-calls"="false" + "no-frame-pointer-elim"="true" }