diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1857,6 +1857,9 @@ else if (JumpTarget.isSymbol()) BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). addExternalSymbol(JumpTarget.getSymbolName()); + else if (JumpTarget.isMCSymbol()) + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). + addSym(JumpTarget.getMCSymbol()); else llvm_unreachable("Expecting Global or External Symbol"); } else if (RetOpcode == PPC::TCRETURNri) { @@ -1876,6 +1879,9 @@ else if (JumpTarget.isSymbol()) BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). addExternalSymbol(JumpTarget.getSymbolName()); + else if (JumpTarget.isMCSymbol()) + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). + addSym(JumpTarget.getMCSymbol()); else llvm_unreachable("Expecting Global or External Symbol"); } else if (RetOpcode == PPC::TCRETURNri8) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1120,6 +1120,11 @@ bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &Ins, SelectionDAG &DAG) const; + bool IsEligibleForTailCallOptimization_AIX( + SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, + bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, SelectionDAG &DAG) const; + SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut, SDValue &FPOpOut, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5343,11 +5343,12 @@ cast(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || + (Subtarget.isAIXABI() && Callee.getOpcode() == ISD::MCSymbol) || isa(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && - "Expecting a global address, external symbol, absolute value, " - "register or an indirect tail call when PC Relative calls are " - "used."); + "Expecting a global address, external symbol, mcsymbol, absolute " + "value, register or an indirect tail call when PC Relative calls " + "are used."); // PC Relative calls also use TC_RETURN as the way to mark tail calls. assert(CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."); @@ -5399,6 +5400,9 @@ else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) isTailCall = IsEligibleForTailCallOptimization_64SVR4( Callee, CallConv, CB, isVarArg, Outs, Ins, DAG); + else if (Subtarget.isAIXABI()) + isTailCall = IsEligibleForTailCallOptimization_AIX( + Callee, CallConv, CB, isVarArg, Outs, Ins, DAG); else isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); @@ -6929,6 +6933,49 @@ return Chain; } +bool PPCTargetLowering::IsEligibleForTailCallOptimization_AIX( + SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, SelectionDAG &DAG) const { + assert(!Subtarget.isUsingPCRelativeCalls() && + "PCRelative calls not supported on AIX"); + bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; + // TODO: Support non-sibling tail call. + if (TailCallOpt) + return false; + if (DisableSCO) + return false; + // Variadic argument function is unsupported. + if (isVarArg) + return false; + // Argument passed byval is unsupported. + if (any_of(Outs, [](const ISD::OutputArg &OA) { return OA.Flags.isByVal(); })) + return false; + if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); })) + return false; + MachineFunction &MF = DAG.getMachineFunction(); + Function &Caller = MF.getFunction(); + CallingConv::ID CallerCC = Caller.getCallingConv(); + // Check if calling convention is compatible. + if ((CalleeCC != CallingConv::C && CalleeCC != CallingConv::Fast) || + (CalleeCC != CallerCC)) + return false; + // Check if any stack slot is required to pass parameter. + // FIXME: CCInfo.AnalyzeCallOperands will be called in LowerCall_AIX again. + SmallVector ArgLocs; + AIXCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, CC_AIX); + if (any_of(ArgLocs, [](const CCValAssign &VA) { + return VA.needsCustom() || VA.isMemLoc(); + })) + return false; + // It's ineligible to perform tailcallopt if recovery of TOC pointer is + // required after the call. + if (!isFunctionGlobalAddress(Callee) && !isa(Callee)) + return false; + return callsShareTOCBase(&Caller, Callee, getTargetMachine()); +} + SDValue PPCTargetLowering::LowerCall_AIX( SDValue Chain, SDValue Callee, CallFlags CFlags, const SmallVectorImpl &Outs, @@ -6946,6 +6993,7 @@ if (CFlags.IsPatchPoint) report_fatal_error("This call type is unimplemented on AIX."); + bool IsTailCall = CFlags.IsTailCall; const PPCSubtarget& Subtarget = static_cast(DAG.getSubtarget()); @@ -6976,10 +7024,16 @@ const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize, CCInfo.getNextStackOffset()); + // TODO: To support tailcall, CalculateTailCallSPDiff should be called. + int SPDiff = 0; + if (IsTailCall) + Chain = DAG.getStackArgumentTokenFactor(Chain); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass. Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue CallSeqStart = Chain; + SDValue LROp, FPOp; + Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); SmallVector, 8> RegsToPass; SmallVector MemOpChains; @@ -6989,7 +7043,7 @@ // passing. const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64) : DAG.getRegister(PPC::R1, MVT::i32); - + SmallVector TailCallArguments; for (unsigned I = 0, E = ArgLocs.size(); I != E;) { const unsigned ValNo = ArgLocs[I].getValNo(); SDValue Arg = OutVals[ValNo]; @@ -7246,7 +7300,9 @@ InFlag = Chain.getValue(1); } - const int SPDiff = 0; + if (IsTailCall) + PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, + TailCallArguments); return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CB); } diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -334,6 +334,9 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm), (TCRETURNdi8 texternalsym:$dst, imm:$imm)>; +def : Pat<(PPCtc_return (i64 mcsym:$dst), imm:$imm), + (TCRETURNdi8 mcsym:$dst, imm:$imm)>; + def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), (TCRETURNri8 CTRRC8:$dst, imm:$imm)>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3388,6 +3388,9 @@ def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm), (TCRETURNdi texternalsym:$dst, imm:$imm)>; +def : Pat<(PPCtc_return (i32 mcsym:$dst), imm:$imm), + (TCRETURNdi mcsym:$dst, imm:$imm)>; + def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm), (TCRETURNri CTRRC:$dst, imm:$imm)>; diff --git a/llvm/test/CodeGen/PowerPC/tailcall-aix.ll b/llvm/test/CodeGen/PowerPC/tailcall-aix.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/tailcall-aix.ll @@ -0,0 +1,413 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=CHECK-AIX-64 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names < %s | FileCheck --check-prefix=CHECK-AIX-32 %s + +define dso_local i32 @f(i32) { +; CHECK-AIX-64-LABEL: f: +; CHECK-AIX-64: # %bb.0: +; CHECK-AIX-64-NEXT: li 3, 42 +; CHECK-AIX-64-NEXT: blr +; +; CHECK-AIX-32-LABEL: f: +; CHECK-AIX-32: # %bb.0: +; CHECK-AIX-32-NEXT: li 3, 42 +; CHECK-AIX-32-NEXT: blr + ret i32 42 +} + +define i32 @g(i32) { +; CHECK-AIX-64-LABEL: g: +; CHECK-AIX-64: # %bb.0: +; CHECK-AIX-64-NEXT: li 3, 42 +; CHECK-AIX-64-NEXT: blr +; +; CHECK-AIX-32-LABEL: g: +; CHECK-AIX-32: # %bb.0: +; CHECK-AIX-32-NEXT: li 3, 42 +; CHECK-AIX-32-NEXT: blr + ret i32 42 +} + +define i32 @caller(i1 %flag) { +; CHECK-AIX-64-LABEL: caller: +; CHECK-AIX-64: # %bb.0: # %entry +; CHECK-AIX-64-NEXT: mflr 0 +; CHECK-AIX-64-NEXT: std 0, 16(1) +; CHECK-AIX-64-NEXT: stdu 1, -112(1) +; CHECK-AIX-64-NEXT: andi. 3, 3, 1 +; CHECK-AIX-64-NEXT: bc 4, gt, L..BB2_2 +; CHECK-AIX-64-NEXT: # %bb.1: # %true +; CHECK-AIX-64-NEXT: li 3, 1 +; CHECK-AIX-64-NEXT: addi 1, 1, 112 +; CHECK-AIX-64-NEXT: ld 0, 16(1) +; CHECK-AIX-64-NEXT: mtlr 0 +; CHECK-AIX-64-NEXT: b .f +; CHECK-AIX-64-NEXT: #TC_RETURNd8 .f 0 +; CHECK-AIX-64-NEXT: L..BB2_2: # %false +; CHECK-AIX-64-NEXT: li 3, 0 +; CHECK-AIX-64-NEXT: bl .g +; CHECK-AIX-64-NEXT: nop +; CHECK-AIX-64-NEXT: addi 1, 1, 112 +; CHECK-AIX-64-NEXT: ld 0, 16(1) +; CHECK-AIX-64-NEXT: mtlr 0 +; CHECK-AIX-64-NEXT: blr +; +; CHECK-AIX-32-LABEL: caller: +; CHECK-AIX-32: # %bb.0: # %entry +; CHECK-AIX-32-NEXT: mflr 0 +; CHECK-AIX-32-NEXT: stw 0, 8(1) +; CHECK-AIX-32-NEXT: stwu 1, -64(1) +; CHECK-AIX-32-NEXT: andi. 3, 3, 1 +; CHECK-AIX-32-NEXT: bc 4, gt, L..BB2_2 +; CHECK-AIX-32-NEXT: # %bb.1: # %true +; CHECK-AIX-32-NEXT: li 3, 1 +; CHECK-AIX-32-NEXT: addi 1, 1, 64 +; CHECK-AIX-32-NEXT: lwz 0, 8(1) +; CHECK-AIX-32-NEXT: mtlr 0 +; CHECK-AIX-32-NEXT: b .f +; CHECK-AIX-32-NEXT: #TC_RETURNd .f 0 +; CHECK-AIX-32-NEXT: L..BB2_2: # %false +; CHECK-AIX-32-NEXT: li 3, 0 +; CHECK-AIX-32-NEXT: bl .g +; CHECK-AIX-32-NEXT: nop +; CHECK-AIX-32-NEXT: addi 1, 1, 64 +; CHECK-AIX-32-NEXT: lwz 0, 8(1) +; CHECK-AIX-32-NEXT: mtlr 0 +; CHECK-AIX-32-NEXT: blr +entry: + br i1 %flag, label %true, label %false +true: + %a = tail call i32 @f(i32 1) + ret i32 %a +false: + %b = tail call i32 @g(i32 0) + ret i32 %b +} + +define dso_local fastcc i32 @ff(i32) { +; CHECK-AIX-64-LABEL: ff: +; CHECK-AIX-64: # %bb.0: +; CHECK-AIX-64-NEXT: li 3, 42 +; CHECK-AIX-64-NEXT: blr +; +; CHECK-AIX-32-LABEL: ff: +; CHECK-AIX-32: # %bb.0: +; CHECK-AIX-32-NEXT: li 3, 42 +; CHECK-AIX-32-NEXT: blr + ret i32 42 +} + +define i32 @caller1(i32 %x) { +; CHECK-AIX-64-LABEL: caller1: +; CHECK-AIX-64: # %bb.0: +; CHECK-AIX-64-NEXT: stdu 1, -112(1) +; CHECK-AIX-64-NEXT: clrldi 3, 3, 32 +; CHECK-AIX-64-NEXT: addi 1, 1, 112 +; CHECK-AIX-64-NEXT: b .ff +; CHECK-AIX-64-NEXT: #TC_RETURNd8 .ff 0 +; +; CHECK-AIX-32-LABEL: caller1: +; CHECK-AIX-32: # %bb.0: +; CHECK-AIX-32-NEXT: stwu 1, -64(1) +; CHECK-AIX-32-NEXT: addi 1, 1, 64 +; CHECK-AIX-32-NEXT: b .ff +; CHECK-AIX-32-NEXT: #TC_RETURNd .ff 0 + %r = tail call i32 @ff(i32 %x) + ret i32 %r +} + +define dso_local i32 @byval_callee(i32* byval(i32) %x) { +; CHECK-AIX-64-LABEL: byval_callee: +; CHECK-AIX-64: # %bb.0: +; CHECK-AIX-64-NEXT: std 3, 48(1) +; CHECK-AIX-64-NEXT: lwz 3, 48(1) +; CHECK-AIX-64-NEXT: blr +; +; CHECK-AIX-32-LABEL: byval_callee: +; CHECK-AIX-32: # %bb.0: +; CHECK-AIX-32-NEXT: stw 3, 24(1) +; CHECK-AIX-32-NEXT: blr + %y = load i32, i32* %x + ret i32 %y +} + +define i32 @caller2(i32* %x) { +; CHECK-AIX-64-LABEL: caller2: +; CHECK-AIX-64: # %bb.0: +; CHECK-AIX-64-NEXT: mflr 0 +; CHECK-AIX-64-NEXT: std 0, 16(1) +; CHECK-AIX-64-NEXT: stdu 1, -112(1) +; CHECK-AIX-64-NEXT: lwz 3, 0(3) +; CHECK-AIX-64-NEXT: sldi 3, 3, 32 +; CHECK-AIX-64-NEXT: bl .byval_callee +; CHECK-AIX-64-NEXT: addi 1, 1, 112 +; CHECK-AIX-64-NEXT: ld 0, 16(1) +; CHECK-AIX-64-NEXT: mtlr 0 +; CHECK-AIX-64-NEXT: blr +; +; CHECK-AIX-32-LABEL: caller2: +; CHECK-AIX-32: # %bb.0: +; CHECK-AIX-32-NEXT: mflr 0 +; CHECK-AIX-32-NEXT: stw 0, 8(1) +; CHECK-AIX-32-NEXT: stwu 1, -64(1) +; CHECK-AIX-32-NEXT: lwz 3, 0(3) +; CHECK-AIX-32-NEXT: bl .byval_callee +; CHECK-AIX-32-NEXT: addi 1, 1, 64 +; CHECK-AIX-32-NEXT: lwz 0, 8(1) +; CHECK-AIX-32-NEXT: mtlr 0 +; CHECK-AIX-32-NEXT: blr + %r = tail call i32 @byval_callee(i32* %x) + ret i32 %r +} + +define dso_local i32 @vararg_callee(i8* %prefix, ...) { +; CHECK-AIX-64-LABEL: vararg_callee: +; CHECK-AIX-64: # %bb.0: +; CHECK-AIX-64-NEXT: li 3, 42 +; CHECK-AIX-64-NEXT: std 4, 56(1) +; CHECK-AIX-64-NEXT: std 5, 64(1) +; CHECK-AIX-64-NEXT: std 6, 72(1) +; CHECK-AIX-64-NEXT: std 7, 80(1) +; CHECK-AIX-64-NEXT: std 8, 88(1) +; CHECK-AIX-64-NEXT: std 9, 96(1) +; CHECK-AIX-64-NEXT: std 10, 104(1) +; CHECK-AIX-64-NEXT: blr +; +; CHECK-AIX-32-LABEL: vararg_callee: +; CHECK-AIX-32: # %bb.0: +; CHECK-AIX-32-NEXT: li 3, 42 +; CHECK-AIX-32-NEXT: stw 4, 28(1) +; CHECK-AIX-32-NEXT: stw 5, 32(1) +; CHECK-AIX-32-NEXT: stw 6, 36(1) +; CHECK-AIX-32-NEXT: stw 7, 40(1) +; CHECK-AIX-32-NEXT: stw 8, 44(1) +; CHECK-AIX-32-NEXT: stw 9, 48(1) +; CHECK-AIX-32-NEXT: stw 10, 52(1) +; CHECK-AIX-32-NEXT: blr + ret i32 42 +} + +define i32 @caller3(i8* %prefix) { +; CHECK-AIX-64-LABEL: caller3: +; CHECK-AIX-64: # %bb.0: +; CHECK-AIX-64-NEXT: mflr 0 +; CHECK-AIX-64-NEXT: std 0, 16(1) +; CHECK-AIX-64-NEXT: stdu 1, -112(1) +; CHECK-AIX-64-NEXT: li 4, 0 +; CHECK-AIX-64-NEXT: li 5, 1 +; CHECK-AIX-64-NEXT: bl .vararg_callee +; CHECK-AIX-64-NEXT: addi 1, 1, 112 +; CHECK-AIX-64-NEXT: ld 0, 16(1) +; CHECK-AIX-64-NEXT: mtlr 0 +; CHECK-AIX-64-NEXT: blr +; +; CHECK-AIX-32-LABEL: caller3: +; CHECK-AIX-32: # %bb.0: +; CHECK-AIX-32-NEXT: mflr 0 +; CHECK-AIX-32-NEXT: stw 0, 8(1) +; CHECK-AIX-32-NEXT: stwu 1, -64(1) +; CHECK-AIX-32-NEXT: li 4, 0 +; CHECK-AIX-32-NEXT: li 5, 1 +; CHECK-AIX-32-NEXT: bl .vararg_callee +; CHECK-AIX-32-NEXT: addi 1, 1, 64 +; CHECK-AIX-32-NEXT: lwz 0, 8(1) +; CHECK-AIX-32-NEXT: mtlr 0 +; CHECK-AIX-32-NEXT: blr + %r = tail call i32(i8*, ...) @vararg_callee(i8* %prefix, i32 0, i32 1) + ret i32 %r +} + +define dso_local i32 @needstackslot_callee(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %p7, i32 %p8) { +; CHECK-AIX-64-LABEL: needstackslot_callee: +; CHECK-AIX-64: # %bb.0: +; CHECK-AIX-64-NEXT: li 3, 42 +; CHECK-AIX-64-NEXT: blr +; +; CHECK-AIX-32-LABEL: needstackslot_callee: +; CHECK-AIX-32: # %bb.0: +; CHECK-AIX-32-NEXT: li 3, 42 +; CHECK-AIX-32-NEXT: blr + ret i32 42 +} + +define dso_local i32 @not_needstackslot_callee(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %p7) { +; CHECK-AIX-64-LABEL: not_needstackslot_callee: +; CHECK-AIX-64: # %bb.0: +; CHECK-AIX-64-NEXT: li 3, 42 +; CHECK-AIX-64-NEXT: blr +; +; CHECK-AIX-32-LABEL: not_needstackslot_callee: +; CHECK-AIX-32: # %bb.0: +; CHECK-AIX-32-NEXT: li 3, 42 +; CHECK-AIX-32-NEXT: blr + ret i32 42 +} + +define i32 @caller4(i1 %flag) { +; CHECK-AIX-64-LABEL: caller4: +; CHECK-AIX-64: # %bb.0: # %entry +; CHECK-AIX-64-NEXT: mflr 0 +; CHECK-AIX-64-NEXT: std 0, 16(1) +; CHECK-AIX-64-NEXT: stdu 1, -128(1) +; CHECK-AIX-64-NEXT: andi. 3, 3, 1 +; CHECK-AIX-64-NEXT: bc 4, gt, L..BB11_2 +; CHECK-AIX-64-NEXT: # %bb.1: # %true +; CHECK-AIX-64-NEXT: li 11, 8 +; CHECK-AIX-64-NEXT: li 3, 0 +; CHECK-AIX-64-NEXT: li 4, 1 +; CHECK-AIX-64-NEXT: li 5, 2 +; CHECK-AIX-64-NEXT: li 6, 3 +; CHECK-AIX-64-NEXT: li 7, 4 +; CHECK-AIX-64-NEXT: li 8, 5 +; CHECK-AIX-64-NEXT: li 9, 6 +; CHECK-AIX-64-NEXT: li 10, 7 +; CHECK-AIX-64-NEXT: std 11, 112(1) +; CHECK-AIX-64-NEXT: bl .needstackslot_callee +; CHECK-AIX-64-NEXT: addi 1, 1, 128 +; CHECK-AIX-64-NEXT: ld 0, 16(1) +; CHECK-AIX-64-NEXT: mtlr 0 +; CHECK-AIX-64-NEXT: blr +; CHECK-AIX-64-NEXT: L..BB11_2: # %false +; CHECK-AIX-64-NEXT: li 3, 0 +; CHECK-AIX-64-NEXT: li 4, 1 +; CHECK-AIX-64-NEXT: li 5, 2 +; CHECK-AIX-64-NEXT: li 6, 3 +; CHECK-AIX-64-NEXT: li 7, 4 +; CHECK-AIX-64-NEXT: li 8, 5 +; CHECK-AIX-64-NEXT: li 9, 6 +; CHECK-AIX-64-NEXT: li 10, 7 +; CHECK-AIX-64-NEXT: addi 1, 1, 128 +; CHECK-AIX-64-NEXT: ld 0, 16(1) +; CHECK-AIX-64-NEXT: mtlr 0 +; CHECK-AIX-64-NEXT: b .not_needstackslot_callee +; CHECK-AIX-64-NEXT: #TC_RETURNd8 .not_needstackslot_callee 0 +; +; CHECK-AIX-32-LABEL: caller4: +; CHECK-AIX-32: # %bb.0: # %entry +; CHECK-AIX-32-NEXT: mflr 0 +; CHECK-AIX-32-NEXT: stw 0, 8(1) +; CHECK-AIX-32-NEXT: stwu 1, -64(1) +; CHECK-AIX-32-NEXT: andi. 3, 3, 1 +; CHECK-AIX-32-NEXT: bc 4, gt, L..BB11_2 +; CHECK-AIX-32-NEXT: # %bb.1: # %true +; CHECK-AIX-32-NEXT: li 11, 8 +; CHECK-AIX-32-NEXT: li 3, 0 +; CHECK-AIX-32-NEXT: li 4, 1 +; CHECK-AIX-32-NEXT: li 5, 2 +; CHECK-AIX-32-NEXT: li 6, 3 +; CHECK-AIX-32-NEXT: li 7, 4 +; CHECK-AIX-32-NEXT: li 8, 5 +; CHECK-AIX-32-NEXT: li 9, 6 +; CHECK-AIX-32-NEXT: li 10, 7 +; CHECK-AIX-32-NEXT: stw 11, 56(1) +; CHECK-AIX-32-NEXT: bl .needstackslot_callee +; CHECK-AIX-32-NEXT: addi 1, 1, 64 +; CHECK-AIX-32-NEXT: lwz 0, 8(1) +; CHECK-AIX-32-NEXT: mtlr 0 +; CHECK-AIX-32-NEXT: blr +; CHECK-AIX-32-NEXT: L..BB11_2: # %false +; CHECK-AIX-32-NEXT: li 3, 0 +; CHECK-AIX-32-NEXT: li 4, 1 +; CHECK-AIX-32-NEXT: li 5, 2 +; CHECK-AIX-32-NEXT: li 6, 3 +; CHECK-AIX-32-NEXT: li 7, 4 +; CHECK-AIX-32-NEXT: li 8, 5 +; CHECK-AIX-32-NEXT: li 9, 6 +; CHECK-AIX-32-NEXT: li 10, 7 +; CHECK-AIX-32-NEXT: addi 1, 1, 64 +; CHECK-AIX-32-NEXT: lwz 0, 8(1) +; CHECK-AIX-32-NEXT: mtlr 0 +; CHECK-AIX-32-NEXT: b .not_needstackslot_callee +; CHECK-AIX-32-NEXT: #TC_RETURNd .not_needstackslot_callee 0 +entry: + br i1 %flag, label %true, label %false +true: + %r0 = tail call i32 @needstackslot_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8) + ret i32 %r0 +false: + %r1 = tail call i32 @not_needstackslot_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7) + ret i32 %r1 +} + +define dso_local float @float_needstackslot_callee(float %p0, float %p1, float %p2, float %p3, float %p4, float %p5, float %p6, float %p7, float %p8) { +; CHECK-AIX-64-LABEL: float_needstackslot_callee: +; CHECK-AIX-64: # %bb.0: +; CHECK-AIX-64-NEXT: ld 3, L..C0(2) +; CHECK-AIX-64-NEXT: lfs 1, 0(3) +; CHECK-AIX-64-NEXT: blr +; +; CHECK-AIX-32-LABEL: float_needstackslot_callee: +; CHECK-AIX-32: # %bb.0: +; CHECK-AIX-32-NEXT: lwz 3, L..C0(2) +; CHECK-AIX-32-NEXT: lfs 1, 0(3) +; CHECK-AIX-32-NEXT: blr + ret float 42.0 +} + +define float @caller5() { +; CHECK-AIX-64-LABEL: caller5: +; CHECK-AIX-64: # %bb.0: +; CHECK-AIX-64-NEXT: mflr 0 +; CHECK-AIX-64-NEXT: std 0, 16(1) +; CHECK-AIX-64-NEXT: stdu 1, -128(1) +; CHECK-AIX-64-NEXT: ld 3, L..C1(2) +; CHECK-AIX-64-NEXT: ld 4, L..C2(2) +; CHECK-AIX-64-NEXT: lfs 1, 0(3) +; CHECK-AIX-64-NEXT: ld 3, L..C3(2) +; CHECK-AIX-64-NEXT: lfs 2, 0(4) +; CHECK-AIX-64-NEXT: ld 4, L..C4(2) +; CHECK-AIX-64-NEXT: lfs 3, 0(3) +; CHECK-AIX-64-NEXT: ld 3, L..C5(2) +; CHECK-AIX-64-NEXT: lfs 4, 0(4) +; CHECK-AIX-64-NEXT: ld 4, L..C6(2) +; CHECK-AIX-64-NEXT: lfs 5, 0(3) +; CHECK-AIX-64-NEXT: ld 3, L..C7(2) +; CHECK-AIX-64-NEXT: lfs 6, 0(4) +; CHECK-AIX-64-NEXT: ld 4, L..C8(2) +; CHECK-AIX-64-NEXT: lfs 7, 0(3) +; CHECK-AIX-64-NEXT: ld 3, L..C9(2) +; CHECK-AIX-64-NEXT: lfs 8, 0(4) +; CHECK-AIX-64-NEXT: lfs 9, 0(3) +; CHECK-AIX-64-NEXT: lis 3, 16640 +; CHECK-AIX-64-NEXT: stw 3, 112(1) +; CHECK-AIX-64-NEXT: bl .float_needstackslot_callee +; CHECK-AIX-64-NEXT: addi 1, 1, 128 +; CHECK-AIX-64-NEXT: ld 0, 16(1) +; CHECK-AIX-64-NEXT: mtlr 0 +; CHECK-AIX-64-NEXT: blr +; +; CHECK-AIX-32-LABEL: caller5: +; CHECK-AIX-32: # %bb.0: +; CHECK-AIX-32-NEXT: mflr 0 +; CHECK-AIX-32-NEXT: stw 0, 8(1) +; CHECK-AIX-32-NEXT: stwu 1, -64(1) +; CHECK-AIX-32-NEXT: lwz 3, L..C1(2) +; CHECK-AIX-32-NEXT: lwz 4, L..C2(2) +; CHECK-AIX-32-NEXT: lfs 1, 0(3) +; CHECK-AIX-32-NEXT: lwz 3, L..C3(2) +; CHECK-AIX-32-NEXT: lfs 2, 0(4) +; CHECK-AIX-32-NEXT: lwz 4, L..C4(2) +; CHECK-AIX-32-NEXT: lfs 3, 0(3) +; CHECK-AIX-32-NEXT: lwz 3, L..C5(2) +; CHECK-AIX-32-NEXT: lfs 4, 0(4) +; CHECK-AIX-32-NEXT: lwz 4, L..C6(2) +; CHECK-AIX-32-NEXT: lfs 5, 0(3) +; CHECK-AIX-32-NEXT: lwz 3, L..C7(2) +; CHECK-AIX-32-NEXT: lfs 6, 0(4) +; CHECK-AIX-32-NEXT: lwz 4, L..C8(2) +; CHECK-AIX-32-NEXT: lfs 7, 0(3) +; CHECK-AIX-32-NEXT: lwz 3, L..C9(2) +; CHECK-AIX-32-NEXT: lfs 8, 0(4) +; CHECK-AIX-32-NEXT: lfs 9, 0(3) +; CHECK-AIX-32-NEXT: lis 3, 16640 +; CHECK-AIX-32-NEXT: stw 3, 56(1) +; CHECK-AIX-32-NEXT: bl .float_needstackslot_callee +; CHECK-AIX-32-NEXT: addi 1, 1, 64 +; CHECK-AIX-32-NEXT: lwz 0, 8(1) +; CHECK-AIX-32-NEXT: mtlr 0 +; CHECK-AIX-32-NEXT: blr + %r = tail call float @float_needstackslot_callee(float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0) + ret float %r +}