Index: llvm/lib/Target/Sparc/SparcISelLowering.cpp =================================================================== --- llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -770,7 +770,10 @@ return false; // Do not tail call opt if the stack is used to pass parameters. - if (CCInfo.getNextStackOffset() != 0) + // 64-bit targets have a slightly higher limit since the ABI requires + // to allocate some space even when all the parameters fit inside registers. + unsigned StackOffsetLimit = Subtarget->is64Bit() ? 48 : 0; + if (CCInfo.getNextStackOffset() > StackOffsetLimit) return false; // Do not tail call opt if either the callee or caller returns @@ -1189,20 +1192,21 @@ SDValue Chain = CLI.Chain; auto PtrVT = getPointerTy(DAG.getDataLayout()); - // Sparc target does not yet support tail call optimization. - CLI.IsTailCall = false; - // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64); + CLI.IsTailCall = CLI.IsTailCall && IsEligibleForTailCallOptimization( + CCInfo, CLI, DAG.getMachineFunction()); + // Get the size of the outgoing arguments stack space requirement. // The stack offset computed by CC_Sparc64 includes all arguments. // Called functions expect 6 argument words to exist in the stack frame, used // or not. - unsigned ArgsSize = std::max(6*8u, CCInfo.getNextStackOffset()); + unsigned StackReserved = 6 * 8u; + unsigned ArgsSize = std::max(StackReserved, CCInfo.getNextStackOffset()); // Keep stack frames 16-byte aligned. ArgsSize = alignTo(ArgsSize, 16); @@ -1211,10 +1215,13 @@ if (CLI.IsVarArg) fixupVariableFloatArgs(ArgLocs, CLI.Outs); + assert(!CLI.IsTailCall || ArgsSize == StackReserved); + // Adjust the stack pointer to make room for the arguments. // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls // with more than 6 arguments. - Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL); + if (!CLI.IsTailCall) + Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL); // Collect the set of registers to pass to the function and their values. // This will be emitted as a sequence of CopyToReg nodes glued to the call @@ -1274,10 +1281,16 @@ DAG.getLoad(MVT::i64, DL, Store, HiPtrOff, MachinePointerInfo()); SDValue Lo64 = DAG.getLoad(MVT::i64, DL, Store, LoPtrOff, MachinePointerInfo()); - RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()), - Hi64)); - RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()+1), - Lo64)); + + Register HiReg = VA.getLocReg(); + Register LoReg = VA.getLocReg() + 1; + if (!CLI.IsTailCall) { + HiReg = toCallerWindow(HiReg); + LoReg = toCallerWindow(LoReg); + } + + RegsToPass.push_back(std::make_pair(HiReg, Hi64)); + RegsToPass.push_back(std::make_pair(LoReg, Lo64)); continue; } @@ -1298,7 +1311,11 @@ ++i; } } - RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()), Arg)); + + Register Reg = VA.getLocReg(); + if (!CLI.IsTailCall) + Reg = toCallerWindow(Reg); + RegsToPass.push_back(std::make_pair(Reg, Arg)); continue; } @@ -1366,6 +1383,10 @@ Ops.push_back(InGlue); // Now the call itself. + if (CLI.IsTailCall) { + DAG.getMachineFunction().getFrameInfo().setHasTailCall(); + return DAG.getNode(SPISD::TAIL_CALL, DL, MVT::Other, Ops); + } SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, Ops); InGlue = Chain.getValue(1); Index: llvm/test/CodeGen/SPARC/2011-01-11-Call.ll =================================================================== --- llvm/test/CodeGen/SPARC/2011-01-11-Call.ll +++ llvm/test/CodeGen/SPARC/2011-01-11-Call.ll @@ -37,11 +37,9 @@ ; V8-NEXT: mov %g1, %o7 ; V9-LABEL: test_tail_call_with_return -; V9: save %sp -; V9: call foo -; V9-NEXT: nop -; V9: ret -; V9-NEXT: restore %g0, %o0, %o0 +; V9: mov %o7, %g1 +; V9-NEXT: call foo +; V9-NEXT: mov %g1, %o7 define i32 @test_tail_call_with_return() nounwind { entry: Index: llvm/test/CodeGen/SPARC/tailcall.ll =================================================================== --- llvm/test/CodeGen/SPARC/tailcall.ll +++ llvm/test/CodeGen/SPARC/tailcall.ll @@ -1,46 +1,72 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s --check-prefix=V8 +; RUN: llc < %s -mtriple=sparcv9 -verify-machineinstrs | FileCheck %s --check-prefix=V9 define i32 @simple_leaf(i32 %i) #0 { -; CHECK-LABEL: simple_leaf: -; CHECK: ! %bb.0: ! %entry -; CHECK-NEXT: mov %o7, %g1 -; CHECK-NEXT: call foo -; CHECK-NEXT: mov %g1, %o7 +; V8-LABEL: simple_leaf: +; V8: ! %bb.0: ! %entry +; V8-NEXT: mov %o7, %g1 +; V8-NEXT: call foo +; V8-NEXT: mov %g1, %o7 +; +; V9-LABEL: simple_leaf: +; V9: ! %bb.0: ! %entry +; V9-NEXT: mov %o7, %g1 +; V9-NEXT: call foo +; V9-NEXT: mov %g1, %o7 entry: %call = tail call i32 @foo(i32 %i) ret i32 %call } define i32 @simple_standard(i32 %i) #1 { -; CHECK-LABEL: simple_standard: -; CHECK: ! %bb.0: ! %entry -; CHECK-NEXT: save %sp, -96, %sp -; CHECK-NEXT: call foo -; CHECK-NEXT: restore +; V8-LABEL: simple_standard: +; V8: ! %bb.0: ! %entry +; V8-NEXT: save %sp, -96, %sp +; V8-NEXT: call foo +; V8-NEXT: restore +; +; V9-LABEL: simple_standard: +; V9: ! %bb.0: ! %entry +; V9-NEXT: save %sp, -128, %sp +; V9-NEXT: call foo +; V9-NEXT: restore entry: %call = tail call i32 @foo(i32 %i) ret i32 %call } define i32 @extra_arg_leaf(i32 %i) #0 { -; CHECK-LABEL: extra_arg_leaf: -; CHECK: ! %bb.0: ! %entry -; CHECK-NEXT: mov 12, %o1 -; CHECK-NEXT: mov %o7, %g1 -; CHECK-NEXT: call foo2 -; CHECK-NEXT: mov %g1, %o7 +; V8-LABEL: extra_arg_leaf: +; V8: ! %bb.0: ! %entry +; V8-NEXT: mov 12, %o1 +; V8-NEXT: mov %o7, %g1 +; V8-NEXT: call foo2 +; V8-NEXT: mov %g1, %o7 +; +; V9-LABEL: extra_arg_leaf: +; V9: ! %bb.0: ! %entry +; V9-NEXT: mov 12, %o1 +; V9-NEXT: mov %o7, %g1 +; V9-NEXT: call foo2 +; V9-NEXT: mov %g1, %o7 entry: %call = tail call i32 @foo2(i32 %i, i32 12) ret i32 %call } define i32 @extra_arg_standard(i32 %i) #1 { -; CHECK-LABEL: extra_arg_standard: -; CHECK: ! %bb.0: ! %entry -; CHECK-NEXT: save %sp, -96, %sp -; CHECK-NEXT: call foo2 -; CHECK-NEXT: restore %g0, 12, %o1 +; V8-LABEL: extra_arg_standard: +; V8: ! %bb.0: ! %entry +; V8-NEXT: save %sp, -96, %sp +; V8-NEXT: call foo2 +; V8-NEXT: restore %g0, 12, %o1 +; +; V9-LABEL: extra_arg_standard: +; V9: ! %bb.0: ! %entry +; V9-NEXT: save %sp, -128, %sp +; V9-NEXT: call foo2 +; V9-NEXT: restore %g0, 12, %o1 entry: %call = tail call i32 @foo2(i32 %i, i32 12) ret i32 %call @@ -49,17 +75,31 @@ ; Perform tail call optimization for external symbol. define void @caller_extern(i8* %src) optsize #0 { -; CHECK-LABEL: caller_extern: -; CHECK: ! %bb.0: ! %entry -; CHECK-NEXT: sethi %hi(dest), %o1 -; CHECK-NEXT: add %o1, %lo(dest), %o1 -; CHECK-NEXT: mov 7, %o2 -; CHECK-NEXT: mov %o0, %o3 -; CHECK-NEXT: mov %o1, %o0 -; CHECK-NEXT: mov %o3, %o1 -; CHECK-NEXT: mov %o7, %g1 -; CHECK-NEXT: call memcpy -; CHECK-NEXT: mov %g1, %o7 +; V8-LABEL: caller_extern: +; V8: ! %bb.0: ! %entry +; V8-NEXT: sethi %hi(dest), %o1 +; V8-NEXT: add %o1, %lo(dest), %o1 +; V8-NEXT: mov 7, %o2 +; V8-NEXT: mov %o0, %o3 +; V8-NEXT: mov %o1, %o0 +; V8-NEXT: mov %o3, %o1 +; V8-NEXT: mov %o7, %g1 +; V8-NEXT: call memcpy +; V8-NEXT: mov %g1, %o7 +; +; V9-LABEL: caller_extern: +; V9: ! %bb.0: ! %entry +; V9-NEXT: sethi %h44(dest), %o1 +; V9-NEXT: add %o1, %m44(dest), %o1 +; V9-NEXT: sllx %o1, 12, %o1 +; V9-NEXT: add %o1, %l44(dest), %o1 +; V9-NEXT: mov 7, %o2 +; V9-NEXT: mov %o0, %o3 +; V9-NEXT: mov %o1, %o0 +; V9-NEXT: mov %o3, %o1 +; V9-NEXT: mov %o7, %g1 +; V9-NEXT: call memcpy +; V9-NEXT: mov %g1, %o7 entry: tail call void @llvm.memcpy.p0i8.p0i8.i32( i8* getelementptr inbounds ([2 x i8], @@ -71,24 +111,38 @@ ; Perform tail call optimization for function pointer. define i32 @func_ptr_test(i32 ()* nocapture %func_ptr) #0 { -; CHECK-LABEL: func_ptr_test: -; CHECK: ! %bb.0: ! %entry -; CHECK-NEXT: jmp %o0 -; CHECK-NEXT: nop +; V8-LABEL: func_ptr_test: +; V8: ! %bb.0: ! %entry +; V8-NEXT: jmp %o0 +; V8-NEXT: nop +; +; V9-LABEL: func_ptr_test: +; V9: ! %bb.0: ! %entry +; V9-NEXT: jmp %o0 +; V9-NEXT: nop entry: %call = tail call i32 %func_ptr() #1 ret i32 %call } define i32 @func_ptr_test2(i32 (i32, i32, i32)* nocapture %func_ptr, -; CHECK-LABEL: func_ptr_test2: -; CHECK: ! %bb.0: ! %entry -; CHECK-NEXT: save %sp, -96, %sp -; CHECK-NEXT: mov 10, %i3 -; CHECK-NEXT: mov %i0, %i4 -; CHECK-NEXT: mov %i1, %i0 -; CHECK-NEXT: jmp %i4 -; CHECK-NEXT: restore %g0, %i3, %o1 +; V8-LABEL: func_ptr_test2: +; V8: ! %bb.0: ! %entry +; V8-NEXT: save %sp, -96, %sp +; V8-NEXT: mov 10, %i3 +; V8-NEXT: mov %i0, %i4 +; V8-NEXT: mov %i1, %i0 +; V8-NEXT: jmp %i4 +; V8-NEXT: restore %g0, %i3, %o1 +; +; V9-LABEL: func_ptr_test2: +; V9: ! %bb.0: ! %entry +; V9-NEXT: save %sp, -128, %sp +; V9-NEXT: mov 10, %i3 +; V9-NEXT: mov %i0, %i4 +; V9-NEXT: mov %i1, %i0 +; V9-NEXT: jmp %i4 +; V9-NEXT: restore %g0, %i3, %o1 i32 %r, i32 %q) #1 { entry: %call = tail call i32 %func_ptr(i32 %r, i32 10, i32 %q) #1 @@ -99,20 +153,35 @@ ; Do not tail call optimize if stack is used to pass parameters. define i32 @caller_args() #0 { -; CHECK-LABEL: caller_args: -; CHECK: ! %bb.0: ! %entry -; CHECK-NEXT: save %sp, -104, %sp -; CHECK-NEXT: mov 6, %i0 -; CHECK-NEXT: mov %g0, %o0 -; CHECK-NEXT: mov 1, %o1 -; CHECK-NEXT: mov 2, %o2 -; CHECK-NEXT: mov 3, %o3 -; CHECK-NEXT: mov 4, %o4 -; CHECK-NEXT: mov 5, %o5 -; CHECK-NEXT: call foo7 -; CHECK-NEXT: st %i0, [%sp+92] -; CHECK-NEXT: ret -; CHECK-NEXT: restore %g0, %o0, %o0 +; V8-LABEL: caller_args: +; V8: ! %bb.0: ! %entry +; V8-NEXT: save %sp, -104, %sp +; V8-NEXT: mov 6, %i0 +; V8-NEXT: mov %g0, %o0 +; V8-NEXT: mov 1, %o1 +; V8-NEXT: mov 2, %o2 +; V8-NEXT: mov 3, %o3 +; V8-NEXT: mov 4, %o4 +; V8-NEXT: mov 5, %o5 +; V8-NEXT: call foo7 +; V8-NEXT: st %i0, [%sp+92] +; V8-NEXT: ret +; V8-NEXT: restore %g0, %o0, %o0 +; +; V9-LABEL: caller_args: +; V9: ! %bb.0: ! %entry +; V9-NEXT: save %sp, -192, %sp +; V9-NEXT: mov 6, %i0 +; V9-NEXT: mov 0, %o0 +; V9-NEXT: mov 1, %o1 +; V9-NEXT: mov 2, %o2 +; V9-NEXT: mov 3, %o3 +; V9-NEXT: mov 4, %o4 +; V9-NEXT: mov 5, %o5 +; V9-NEXT: call foo7 +; V9-NEXT: stx %i0, [%sp+2223] +; V9-NEXT: ret +; V9-NEXT: restore %g0, %o0, %o0 entry: %r = tail call i32 @foo7(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) ret i32 %r @@ -123,15 +192,23 @@ ; byval parameters. define i32 @caller_byval() #0 { -; CHECK-LABEL: caller_byval: -; CHECK: ! %bb.0: ! %entry -; CHECK-NEXT: save %sp, -104, %sp -; CHECK-NEXT: ld [%fp+-4], %i0 -; CHECK-NEXT: st %i0, [%fp+-8] -; CHECK-NEXT: call callee_byval -; CHECK-NEXT: add %fp, -8, %o0 -; CHECK-NEXT: ret -; CHECK-NEXT: restore %g0, %o0, %o0 +; V8-LABEL: caller_byval: +; V8: ! %bb.0: ! %entry +; V8-NEXT: save %sp, -104, %sp +; V8-NEXT: ld [%fp+-4], %i0 +; V8-NEXT: st %i0, [%fp+-8] +; V8-NEXT: call callee_byval +; V8-NEXT: add %fp, -8, %o0 +; V8-NEXT: ret +; V8-NEXT: restore %g0, %o0, %o0 +; +; V9-LABEL: caller_byval: +; V9: ! %bb.0: ! %entry +; V9-NEXT: save %sp, -192, %sp +; V9-NEXT: call callee_byval +; V9-NEXT: add %fp, 2039, %o0 +; V9-NEXT: ret +; V9-NEXT: restore %g0, %o0, %o0 entry: %a = alloca i32* %r = tail call i32 @callee_byval(i32** byval(i32*) %a) @@ -141,11 +218,17 @@ ; Perform tail call optimization for sret function. define void @sret_test(%struct.a* noalias sret(%struct.a) %agg.result) #0 { -; CHECK-LABEL: sret_test: -; CHECK: ! %bb.0: ! %entry -; CHECK-NEXT: mov %o7, %g1 -; CHECK-NEXT: call sret_func -; CHECK-NEXT: mov %g1, %o7 +; V8-LABEL: sret_test: +; V8: ! %bb.0: ! %entry +; V8-NEXT: mov %o7, %g1 +; V8-NEXT: call sret_func +; V8-NEXT: mov %g1, %o7 +; +; V9-LABEL: sret_test: +; V9: ! %bb.0: ! %entry +; V9-NEXT: mov %o7, %g1 +; V9-NEXT: call sret_func +; V9-NEXT: mov %g1, %o7 entry: tail call void bitcast (void (%struct.a*)* @sret_func to void (%struct.a*)*)(%struct.a* sret(%struct.a) %agg.result) @@ -157,17 +240,30 @@ ; struct will generate a memcpy as the tail function. define void @ret_large_struct(%struct.big* noalias sret(%struct.big) %agg.result) #0 { -; CHECK-LABEL: ret_large_struct: -; CHECK: ! %bb.0: ! %entry -; CHECK-NEXT: save %sp, -96, %sp -; CHECK-NEXT: ld [%fp+64], %i0 -; CHECK-NEXT: sethi %hi(bigstruct), %i1 -; CHECK-NEXT: add %i1, %lo(bigstruct), %o1 -; CHECK-NEXT: mov 400, %o2 -; CHECK-NEXT: call memcpy -; CHECK-NEXT: mov %i0, %o0 -; CHECK-NEXT: jmp %i7+12 -; CHECK-NEXT: restore +; V8-LABEL: ret_large_struct: +; V8: ! %bb.0: ! %entry +; V8-NEXT: save %sp, -96, %sp +; V8-NEXT: ld [%fp+64], %i0 +; V8-NEXT: sethi %hi(bigstruct), %i1 +; V8-NEXT: add %i1, %lo(bigstruct), %o1 +; V8-NEXT: mov 400, %o2 +; V8-NEXT: call memcpy +; V8-NEXT: mov %i0, %o0 +; V8-NEXT: jmp %i7+12 +; V8-NEXT: restore +; +; V9-LABEL: ret_large_struct: +; V9: ! %bb.0: ! %entry +; V9-NEXT: save %sp, -176, %sp +; V9-NEXT: sethi %h44(bigstruct), %i1 +; V9-NEXT: add %i1, %m44(bigstruct), %i1 +; V9-NEXT: sllx %i1, 12, %i1 +; V9-NEXT: add %i1, %l44(bigstruct), %o1 +; V9-NEXT: mov 400, %o2 +; V9-NEXT: call memcpy +; V9-NEXT: mov %i0, %o0 +; V9-NEXT: ret +; V9-NEXT: restore entry: %0 = bitcast %struct.big* %agg.result to i8* tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 bitcast (%struct.big* @bigstruct to i8*), i32 400, i1 false) @@ -177,10 +273,17 @@ ; Test register + immediate pattern. define void @addri_test(i32 %ptr) #0 { -; CHECK-LABEL: addri_test: -; CHECK: ! %bb.0: ! %entry -; CHECK-NEXT: jmp %o0+4 -; CHECK-NEXT: nop +; V8-LABEL: addri_test: +; V8: ! %bb.0: ! %entry +; V8-NEXT: jmp %o0+4 +; V8-NEXT: nop +; +; V9-LABEL: addri_test: +; V9: ! %bb.0: ! %entry +; V9-NEXT: add %o0, 4, %o0 +; V9-NEXT: srl %o0, 0, %o0 +; V9-NEXT: jmp %o0 +; V9-NEXT: nop entry: %add = add nsw i32 %ptr, 4 %0 = inttoptr i32 %add to void ()*