diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1537,13 +1537,14 @@ // 3) A function does not use the TOC pointer R2 but does have calls. // In this case st_other=1 since we do not know whether or not any // of the callees clobber R2. This case is dealt with in this else if - // block. + // block. Tail calls are considered calls and the st_other should also + // be set to 1 in that case as well. // 4) The function does not use the TOC pointer but R2 is used inside // the function. In this case st_other=1 once again. // 5) This function uses inline asm. We mark R2 as reserved if the function - // has inline asm so we have to assume that it may be used. - if (MF->getFrameInfo().hasCalls() || MF->hasInlineAsm() || - (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) { + // has inline asm as we have to assume that it may be used. + if (MF->getFrameInfo().hasCalls() || MF->getFrameInfo().hasTailCall() || + MF->hasInlineAsm() || (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) { PPCTargetStreamer *TS = static_cast(OutStreamer->getTargetStreamer()); if (TS) diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1674,13 +1674,25 @@ DebugLoc dl = MBBI->getDebugLoc(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); - // Create branch instruction for pseudo tail call return instruction + // Create branch instruction for pseudo tail call return instruction. + // The TCRETURNdi variants are direct calls. Valid targets for those are + // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel + // since we can tail call external functions with PC-Rel (i.e. we don't need + // to worry about different TOC pointers). Some of the external functions will + // be MO_GlobalAddress while others like memcpy for example, are going to + // be MO_ExternalSymbol. unsigned RetOpcode = MBBI->getOpcode(); if (RetOpcode == PPC::TCRETURNdi) { MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + if (JumpTarget.isGlobal()) + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + else if (JumpTarget.isSymbol()) + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). + addExternalSymbol(JumpTarget.getSymbolName()); + else + llvm_unreachable("Expecting Global or External Symbol"); } else if (RetOpcode == PPC::TCRETURNri) { MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); @@ -1692,8 +1704,14 @@ } else if (RetOpcode == PPC::TCRETURNdi8) { MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); - BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). - addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + if (JumpTarget.isGlobal()) + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); + else if (JumpTarget.isSymbol()) + BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). + addExternalSymbol(JumpTarget.getSymbolName()); + else + llvm_unreachable("Expecting Global or External Symbol"); } else if (RetOpcode == PPC::TCRETURNri8) { MBBI = MBB.getLastNonDebugInstr(); assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4780,16 +4780,6 @@ const SmallVectorImpl &Ins, SelectionDAG &DAG) const { bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; - // FIXME: Tail calls are currently disabled when using PC Relative addressing. - // The issue is that PC Relative is only partially implemented and so there - // is currently a mix of functions that require the TOC and functions that do - // not require it. If we have A calls B calls C and both A and B require the - // TOC and C does not and is marked as clobbering R2 then it is not safe for - // B to tail call C. Since we do not have the information of whether or not - // a funciton needs to use the TOC here in this function we need to be - // conservatively safe and disable all tail calls for now. - if (Subtarget.isUsingPCRelativeCalls()) return false; - if (DisableSCO && !TailCallOpt) return false; // Variadic argument functions are not supported. @@ -4829,15 +4819,22 @@ needStackSlotPassParameters(Subtarget, Outs)) return false; - // No TCO/SCO on indirect call because Caller have to restore its TOC - if (!isFunctionGlobalAddress(Callee) && - !isa(Callee)) + // All variants of 64-bit ELF ABIs without PC-Relative addressing require that + // the caller and callee share the same TOC for TCO/SCO. If the caller and + // callee potentially have different TOC bases then we cannot tail call since + // we need to restore the TOC pointer after the call. + // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 + // We cannot guarantee this for indirect calls or calls to external functions. + // When PC-Relative addressing is used, the concept of the TOC is no longer + // applicable so this check is not required. + // Check first for indirect calls. + if (!Subtarget.isUsingPCRelativeCalls() && + !isFunctionGlobalAddress(Callee) && !isa(Callee)) return false; - // If the caller and callee potentially have different TOC bases then we - // cannot tail call since we need to restore the TOC pointer after the call. - // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 - if (!callsShareTOCBase(&Caller, Callee, getTargetMachine())) + // Check if we share the TOC base. + if (!Subtarget.isUsingPCRelativeCalls() && + !callsShareTOCBase(&Caller, Callee, getTargetMachine())) return false; // TCO allows altering callee ABI, so we don't have to check further. @@ -4849,11 +4846,14 @@ // If callee use the same argument list that caller is using, then we can // apply SCO on this case. If it is not, then we need to check if callee needs // stack for passing arguments. - assert(CB && "Expected to have a CallBase!"); - if (!hasSameArgumentList(&Caller, *CB) && - needStackSlotPassParameters(Subtarget, Outs)) { + // PC Relative tail calls may not have a CallBase. + // If there is no CallBase we cannot verify if we have the same argument + // list so assume that we don't have the same argument list. + if (CB && !hasSameArgumentList(&Caller, *CB) && + needStackSlotPassParameters(Subtarget, Outs)) + return false; + else if (!CB && needStackSlotPassParameters(Subtarget, Outs)) return false; - } return true; } @@ -5534,13 +5534,18 @@ // Emit tail call. if (CFlags.IsTailCall) { + // Indirect tail call when using PC Relative calls do not have the same + // constraints. assert(((Callee.getOpcode() == ISD::Register && cast(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || - isa(Callee)) && - "Expecting a global address, external symbol, absolute value or " - "register"); + isa(Callee) || + (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && + "Expecting a global address, external symbol, absolute value, " + "register or an indirect tail call when PC Relative calls are " + "used."); + // PC Relative calls also use TC_RETURN as the way to mark tail calls. assert(CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."); DAG.getMachineFunction().getFrameInfo().setHasTailCall(); @@ -5598,17 +5603,19 @@ if (!getTargetMachine().Options.GuaranteedTailCallOpt) ++NumSiblingCalls; - assert(isa(Callee) && + // PC Relative calls no longer guarantee that the callee is a Global + // Address Node. The callee could be an indirect tail call in which + // case the SDValue for the callee could be a load (to load the address + // of a function pointer) or it may be a register copy (to move the + // address of the callee from a function parameter into a virtual + // register). It may also be an ExternalSymbolSDNode (ex memcopy). + assert((Subtarget.isUsingPCRelativeCalls() || + isa(Callee)) && "Callee should be an llvm::Function object."); - LLVM_DEBUG( - const GlobalValue *GV = - cast(Callee)->getGlobal(); - const unsigned Width = - 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0"); - dbgs() << "TCO caller: " - << left_justify(DAG.getMachineFunction().getName(), Width) - << ", callee linkage: " << GV->getVisibility() << ", " - << GV->getLinkage() << "\n"); + + LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName() + << "\nTCO callee: "); + LLVM_DEBUG(Callee.dump()); } } diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp --- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -86,14 +86,22 @@ RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL; const MachineInstr *MI = MO.getParent(); - - if (MI->getOpcode() == PPC::BL8_NOTOC) - RefKind = MCSymbolRefExpr::VK_PPC_NOTOC; - const MachineFunction *MF = MI->getMF(); const Module *M = MF->getFunction().getParent(); const PPCSubtarget *Subtarget = &(MF->getSubtarget()); const TargetMachine &TM = Printer.TM; + + unsigned MIOpcode = MI->getOpcode(); + assert((Subtarget->isUsingPCRelativeCalls() || MIOpcode != PPC::BL8_NOTOC) && + "BL8_NOTOC is only valid when using PC Relative Calls."); + if (Subtarget->isUsingPCRelativeCalls()) { + if (MIOpcode == PPC::TAILB || MIOpcode == PPC::TAILB8 || + MIOpcode == PPC::TCRETURNdi || MIOpcode == PPC::TCRETURNdi8 || + MIOpcode == PPC::BL8_NOTOC) { + RefKind = MCSymbolRefExpr::VK_PPC_NOTOC; + } + } + const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx); // If -msecure-plt -fPIC, add 32768 to symbol. if (Subtarget->isSecurePlt() && TM.isPositionIndependent() && diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-simple.ll @@ -7,13 +7,11 @@ ; CHECK-S-LABEL: caller -; CHECK-S: bl callee@notoc -; CHECK-S: blr +; CHECK-S: b callee@notoc ; CHECK-O-LABEL: caller -; CHECK-O: bl +; CHECK-O: b ; CHECK-O-NEXT: R_PPC64_REL24_NOTOC callee -; CHECK-O: blr define dso_local signext i32 @caller() local_unnamed_addr { entry: %call = tail call signext i32 bitcast (i32 (...)* @callee to i32 ()*)() @@ -25,13 +23,11 @@ ; Some calls can be considered Extrnal Symbols. ; CHECK-S-LABEL: ExternalSymbol -; CHECK-S: bl memcpy@notoc -; CHECK-S: blr +; CHECK-S: b memcpy@notoc ; CHECK-O-LABEL: ExternalSymbol -; CHECK-O: bl +; CHECK-O: b ; CHECK-O-NEXT: R_PPC64_REL24_NOTOC memcpy -; CHECK-O: blr define dso_local void @ExternalSymbol(i8* nocapture %out, i8* nocapture readonly %in, i64 %num) local_unnamed_addr { entry: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %out, i8* align 1 %in, i64 %num, i1 false) diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll @@ -193,19 +193,10 @@ ; CHECK-ALL-LABEL: TailCallLocal1: ; CHECK-S: .localentry TailCallLocal1 ; CHECK-S: # %bb.0: # %entry -; CHECK-S-NEXT: mflr r0 -; CHECK-S-NEXT: std r0, 16(r1) -; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: .cfi_def_cfa_offset 32 -; CHECK-S-NEXT: .cfi_offset lr, 16 -; CHECK-S-NEXT: plwz r4, globalVar@PCREL(0), 1 +; CHECK-S: plwz r4, globalVar@PCREL(0), 1 ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 -; CHECK-S-NEXT: bl localCall@notoc -; CHECK-S-NEXT: addi r1, r1, 32 -; CHECK-S-NEXT: ld r0, 16(r1) -; CHECK-S-NEXT: mtlr r0 -; CHECK-S-NEXT: blr +; CHECK-S-NEXT: b localCall@notoc entry: %0 = load i32, i32* @globalVar, align 4 %add = add nsw i32 %0, %a @@ -217,20 +208,11 @@ ; CHECK-ALL-LABEL: TailCallLocal2: ; CHECK-S: .localentry TailCallLocal2 ; CHECK-S: # %bb.0: # %entry -; CHECK-S-NEXT: mflr r0 -; CHECK-S-NEXT: std r0, 16(r1) -; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: .cfi_def_cfa_offset 32 -; CHECK-S-NEXT: .cfi_offset lr, 16 -; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S: pld r4, externGlobalVar@got@pcrel(0), 1 ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 -; CHECK-S-NEXT: bl localCall@notoc -; CHECK-S-NEXT: addi r1, r1, 32 -; CHECK-S-NEXT: ld r0, 16(r1) -; CHECK-S-NEXT: mtlr r0 -; CHECK-S-NEXT: blr +; CHECK-S-NEXT: b localCall@notoc entry: %0 = load i32, i32* @externGlobalVar, align 4 %add = add nsw i32 %0, %a @@ -243,16 +225,7 @@ ; CHECK-S: .localentry TailCallLocalNoGlobal, 1 ; CHECK-P9: .localentry TailCallLocalNoGlobal, .Lfunc_lep9-.Lfunc_gep9 ; CHECK-ALL: # %bb.0: # %entry -; CHECK-S-NEXT: mflr r0 -; CHECK-S-NEXT: std r0, 16(r1) -; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: .cfi_def_cfa_offset 32 -; CHECK-S-NEXT: .cfi_offset lr, 16 -; CHECK-S-NEXT: bl localCall@notoc -; CHECK-S-NEXT: addi r1, r1, 32 -; CHECK-S-NEXT: ld r0, 16(r1) -; CHECK-S-NEXT: mtlr r0 -; CHECK-S-NEXT: blr +; CHECK-S: b localCall@notoc entry: %call = tail call signext i32 @localCall(i32 signext %a) ret i32 %call @@ -262,19 +235,10 @@ ; CHECK-ALL-LABEL: TailCallExtern1: ; CHECK-S: .localentry TailCallExtern1 ; CHECK-S: # %bb.0: # %entry -; CHECK-S-NEXT: mflr r0 -; CHECK-S-NEXT: std r0, 16(r1) -; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: .cfi_def_cfa_offset 32 -; CHECK-S-NEXT: .cfi_offset lr, 16 -; CHECK-S-NEXT: plwz r4, globalVar@PCREL(0), 1 +; CHECK-S: plwz r4, globalVar@PCREL(0), 1 ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 -; CHECK-S-NEXT: bl externCall@notoc -; CHECK-S-NEXT: addi r1, r1, 32 -; CHECK-S-NEXT: ld r0, 16(r1) -; CHECK-S-NEXT: mtlr r0 -; CHECK-S-NEXT: blr +; CHECK-S-NEXT: b externCall@notoc entry: %0 = load i32, i32* @globalVar, align 4 %add = add nsw i32 %0, %a @@ -286,20 +250,11 @@ ; CHECK-ALL-LABEL: TailCallExtern2: ; CHECK-S: .localentry TailCallExtern2 ; CHECK-S: # %bb.0: # %entry -; CHECK-S-NEXT: mflr r0 -; CHECK-S-NEXT: std r0, 16(r1) -; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: .cfi_def_cfa_offset 32 -; CHECK-S-NEXT: .cfi_offset lr, 16 -; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1 +; CHECK-S: pld r4, externGlobalVar@got@pcrel(0), 1 ; CHECK-S-NEXT: lwz r4, 0(r4) ; CHECK-S-NEXT: add r3, r4, r3 ; CHECK-S-NEXT: extsw r3, r3 -; CHECK-S-NEXT: bl externCall@notoc -; CHECK-S-NEXT: addi r1, r1, 32 -; CHECK-S-NEXT: ld r0, 16(r1) -; CHECK-S-NEXT: mtlr r0 -; CHECK-S-NEXT: blr +; CHECK-S-NEXT: b externCall@notoc entry: %0 = load i32, i32* @externGlobalVar, align 4 %add = add nsw i32 %0, %a @@ -311,16 +266,8 @@ ; CHECK-ALL-LABEL: TailCallExternNoGlobal: ; CHECK-S: .localentry TailCallExternNoGlobal, 1 ; CHECK-S-NEXT: # %bb.0: # %entry -; CHECK-S-NEXT: mflr r0 -; CHECK-S-NEXT: std r0, 16(r1) -; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: .cfi_def_cfa_offset 32 -; CHECK-S-NEXT: .cfi_offset lr, 16 -; CHECK-S-NEXT: bl externCall@notoc -; CHECK-S-NEXT: addi r1, r1, 32 -; CHECK-S-NEXT: ld r0, 16(r1) -; CHECK-S-NEXT: mtlr r0 -; CHECK-S-NEXT: blr +; CHECK-S-NEXT: b externCall@notoc +; CHECK-S-NEXT: #TC_RETURNd8 externCall@notoc entry: %call = tail call signext i32 @externCall(i32 signext %a) ret i32 %call @@ -443,18 +390,10 @@ define dso_local signext i32 @IndirectCallOnly(i32 signext %a, i32 (i32)* nocapture %call_param) local_unnamed_addr { ; CHECK-ALL-LABEL: IndirectCallOnly: ; CHECK-S: # %bb.0: # %entry -; CHECK-S-NEXT: mflr r0 -; CHECK-S-NEXT: std r0, 16(r1) -; CHECK-S-NEXT: stdu r1, -32(r1) -; CHECK-S-NEXT: .cfi_def_cfa_offset 32 -; CHECK-S-NEXT: .cfi_offset lr, 16 ; CHECK-S-NEXT: mtctr r4 ; CHECK-S-NEXT: mr r12, r4 -; CHECK-S-NEXT: bctrl -; CHECK-S-NEXT: addi r1, r1, 32 -; CHECK-S-NEXT: ld r0, 16(r1) -; CHECK-S-NEXT: mtlr r0 -; CHECK-S-NEXT: blr +; CHECK-S-NEXT: bctr +; CHECK-S-NEXT: #TC_RETURNr8 ctr entry: %call = tail call signext i32 %call_param(i32 signext %a) ret i32 %call diff --git a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll @@ -215,20 +215,13 @@ define dso_local void @ReadFuncPtr() local_unnamed_addr { ; CHECK-LABEL: ReadFuncPtr: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr r0 -; CHECK-NEXT: std r0, 16(r1) -; CHECK-NEXT: stdu r1, -32(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK: .localentry ReadFuncPtr, 1 +; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: pld r3, ptrfunc@got@pcrel(0), 1 ; CHECK-NEXT: ld r12, 0(r3) ; CHECK-NEXT: mtctr r12 -; CHECK-NEXT: bctrl -; CHECK-NEXT: addi r1, r1, 32 -; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: mtlr r0 -; CHECK-NEXT: blr +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr 0 entry: %0 = load void ()*, void ()** bitcast (void (...)** @ptrfunc to void ()**), align 8 tail call void %0() diff --git a/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll b/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-indirect-call.ll @@ -9,20 +9,10 @@ define dso_local void @IndirectCallExternFuncPtr(void ()* nocapture %ptrfunc) { ; CHECK-LABEL: IndirectCallExternFuncPtr: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr r0 -; CHECK-NEXT: std r0, 16(r1) -; CHECK-NEXT: stdu r1, -32(r1) - -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: mr r12, r3 -; CHECK-NEXT: bctrl - -; CHECK-NEXT: addi r1, r1, 32 -; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: mtlr r0 -; CHECK-NEXT: blr +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr entry: tail call void %ptrfunc() ret void diff --git a/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pcrel-tail-calls.ll @@ -0,0 +1,237 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; The tests check the behaviour of PC Relative tail calls. When using +; PC Relative we are able to do more tail calling than we have done in +; the past as we no longer need to restore the TOC pointer into R2 after +; most calls. + +@Func = external local_unnamed_addr global i32 (...)*, align 8 +@FuncLocal = common dso_local local_unnamed_addr global i32 (...)* null, align 8 + +; No calls in this function but we assign the function pointers. +define dso_local void @AssignFuncPtr() local_unnamed_addr { +; CHECK-LABEL: AssignFuncPtr: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pld r3, Func@got@pcrel(0), 1 +; CHECK-NEXT: pld r4, Function@got@pcrel(0), 1 +; CHECK-NEXT: std r4, 0(r3) +; CHECK-NEXT: pstd r4, FuncLocal@PCREL(0), 1 +; CHECK-NEXT: blr +entry: + store i32 (...)* @Function, i32 (...)** @Func, align 8 + store i32 (...)* @Function, i32 (...)** @FuncLocal, align 8 + ret void +} + +declare signext i32 @Function(...) + +define dso_local void @TailCallLocalFuncPtr() local_unnamed_addr { +; CHECK-LABEL: TailCallLocalFuncPtr: +; CHECK: .localentry TailCallLocalFuncPtr, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: pld r12, FuncLocal@PCREL(0), 1 +; CHECK-NEXT: mtctr r12 +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr 0 +entry: + %0 = load i32 ()*, i32 ()** bitcast (i32 (...)** @FuncLocal to i32 ()**), align 8 + %call = tail call signext i32 %0() + ret void +} + +define dso_local void @TailCallExtrnFuncPtr() local_unnamed_addr { +; CHECK-LABEL: TailCallExtrnFuncPtr: +; CHECK: .localentry TailCallExtrnFuncPtr, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: pld r3, Func@got@pcrel(0), 1 +; CHECK-NEXT: ld r12, 0(r3) +; CHECK-NEXT: mtctr r12 +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr 0 +entry: + %0 = load i32 ()*, i32 ()** bitcast (i32 (...)** @Func to i32 ()**), align 8 + %call = tail call signext i32 %0() + ret void +} + +define dso_local signext i32 @TailCallParamFuncPtr(i32 (...)* nocapture %passedfunc) local_unnamed_addr { +; CHECK-LABEL: TailCallParamFuncPtr: +; CHECK: .localentry TailCallParamFuncPtr, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: mr r12, r3 +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr 0 +entry: + %callee.knr.cast = bitcast i32 (...)* %passedfunc to i32 ()* + %call = tail call signext i32 %callee.knr.cast() + ret i32 %call +} + +define dso_local signext i32 @NoTailIndirectCall(i32 (...)* nocapture %passedfunc, i32 signext %a) local_unnamed_addr { +; CHECK-LABEL: NoTailIndirectCall: +; CHECK: .localentry NoTailIndirectCall, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: mr r12, r3 +; CHECK-NEXT: mr r30, r4 +; CHECK-NEXT: bctrl +; CHECK-NEXT: add r3, r3, r30 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %callee.knr.cast = bitcast i32 (...)* %passedfunc to i32 ()* + %call = tail call signext i32 %callee.knr.cast() + %add = add nsw i32 %call, %a + ret i32 %add +} + +define dso_local signext i32 @TailCallDirect() local_unnamed_addr { +; CHECK-LABEL: TailCallDirect: +; CHECK: .localentry TailCallDirect, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: b Function@notoc +; CHECK-NEXT: #TC_RETURNd8 Function@notoc 0 +entry: + %call = tail call signext i32 bitcast (i32 (...)* @Function to i32 ()*)() + ret i32 %call +} + +define dso_local signext i32 @NoTailCallDirect(i32 signext %a) local_unnamed_addr { +; CHECK-LABEL: NoTailCallDirect: +; CHECK: .localentry NoTailCallDirect, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl Function@notoc +; CHECK-NEXT: add r3, r3, r30 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %call = tail call signext i32 bitcast (i32 (...)* @Function to i32 ()*)() + %add = add nsw i32 %call, %a + ret i32 %add +} + +define dso_local signext i32 @TailCallDirectLocal() local_unnamed_addr { +; CHECK-LABEL: TailCallDirectLocal: +; CHECK: .localentry TailCallDirectLocal, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: b LocalFunction@notoc +; CHECK-NEXT: #TC_RETURNd8 LocalFunction@notoc 0 +entry: + %call = tail call fastcc signext i32 @LocalFunction() + ret i32 %call +} + +define dso_local signext i32 @NoTailCallDirectLocal(i32 signext %a) local_unnamed_addr { +; CHECK-LABEL: NoTailCallDirectLocal: +; CHECK: .localentry NoTailCallDirectLocal, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: bl LocalFunction@notoc +; CHECK-NEXT: add r3, r3, r30 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %call = tail call fastcc signext i32 @LocalFunction() + %add = add nsw i32 %call, %a + ret i32 %add +} + +define dso_local signext i32 @TailCallAbs() local_unnamed_addr { +; CHECK-LABEL: TailCallAbs: +; CHECK: .localentry TailCallAbs, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: li r3, 400 +; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: li r12, 400 +; CHECK-NEXT: bctr +; CHECK-NEXT: #TC_RETURNr8 ctr 0 +entry: + %call = tail call signext i32 inttoptr (i64 400 to i32 ()*)() + ret i32 %call +} + +define dso_local signext i32 @NoTailCallAbs(i32 signext %a) local_unnamed_addr { +; CHECK-LABEL: NoTailCallAbs: +; CHECK: .localentry NoTailCallAbs, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mr r30, r3 +; CHECK-NEXT: li r3, 400 +; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: li r12, 400 +; CHECK-NEXT: bctrl +; CHECK-NEXT: add r3, r3, r30 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +entry: + %call = tail call signext i32 inttoptr (i64 400 to i32 ()*)() + %add = add nsw i32 %call, %a + ret i32 %add +} + +; Function Attrs: noinline +; This function should be tail called and not inlined. +define internal fastcc signext i32 @LocalFunction() unnamed_addr #0 { +; CHECK-LABEL: LocalFunction: +; CHECK: .localentry LocalFunction, 1 +; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: li r3, 42 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr +entry: + %0 = tail call i32 asm "li $0, 42", "=&r"() + ret i32 %0 +} + +attributes #0 = { noinline } +