diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -4956,213 +4956,6 @@ return false; } -static unsigned -PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, - SDValue CallSeqStart, const SDLoc &dl, int SPDiff, bool isTailCall, - bool isPatchPoint, bool hasNest, - SmallVectorImpl> &RegsToPass, - SmallVectorImpl &Ops, std::vector &NodeTys, - ImmutableCallSite CS, const PPCSubtarget &Subtarget) { - bool isPPC64 = Subtarget.isPPC64(); - bool isSVR4ABI = Subtarget.isSVR4ABI(); - bool is64BitELFv1ABI = isPPC64 && isSVR4ABI && !Subtarget.isELFv2ABI(); - bool isAIXABI = Subtarget.isAIXABI(); - - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); - NodeTys.push_back(MVT::Other); // Returns a chain - NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. - - unsigned CallOpc = PPCISD::CALL; - - bool needIndirectCall = true; - if (!isSVR4ABI || !isPPC64) - if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { - // If this is an absolute destination address, use the munged value. - Callee = SDValue(Dest, 0); - needIndirectCall = false; - } - - // PC-relative references to external symbols should go through $stub, unless - // we're building with the leopard linker or later, which automatically - // synthesizes these stubs. - const TargetMachine &TM = DAG.getTarget(); - const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); - const GlobalValue *GV = nullptr; - if (auto *G = dyn_cast(Callee)) - GV = G->getGlobal(); - bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); - bool UsePlt = !Local && Subtarget.isTargetELF() && !isPPC64; - - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, - // every direct call is) turn it into a TargetGlobalAddress / - // TargetExternalSymbol node so that legalize doesn't hack it. - if (isFunctionGlobalAddress(Callee)) { - GlobalAddressSDNode *G = cast(Callee); - - // A call to a TLS address is actually an indirect call to a - // thread-specific pointer. - unsigned OpFlags = 0; - if (UsePlt) - OpFlags = PPCII::MO_PLT; - - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, - Callee.getValueType(), 0, OpFlags); - needIndirectCall = false; - } - - if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { - unsigned char OpFlags = 0; - - if (UsePlt) - OpFlags = PPCII::MO_PLT; - - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), - OpFlags); - needIndirectCall = false; - } - - if (isPatchPoint) { - // We'll form an invalid direct call when lowering a patchpoint; the full - // sequence for an indirect call is complicated, and many of the - // instructions introduced might have side effects (and, thus, can't be - // removed later). The call itself will be removed as soon as the - // argument/return lowering is complete, so the fact that it has the wrong - // kind of operands should not really matter. - needIndirectCall = false; - } - - if (needIndirectCall) { - // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair - // to do the call, we can't use PPCISD::CALL. - SDValue MTCTROps[] = {Chain, Callee, InFlag}; - - if (is64BitELFv1ABI) { - // Function pointers in the 64-bit SVR4 ABI do not point to the function - // entry point, but to the function descriptor (the function entry point - // address is part of the function descriptor though). - // The function descriptor is a three doubleword structure with the - // following fields: function entry point, TOC base address and - // environment pointer. - // Thus for a call through a function pointer, the following actions need - // to be performed: - // 1. Save the TOC of the caller in the TOC save area of its stack - // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). - // 2. Load the address of the function entry point from the function - // descriptor. - // 3. Load the TOC of the callee from the function descriptor into r2. - // 4. Load the environment pointer from the function descriptor into - // r11. - // 5. Branch to the function entry point address. - // 6. On return of the callee, the TOC of the caller needs to be - // restored (this is done in FinishCall()). - // - // The loads are scheduled at the beginning of the call sequence, and the - // register copies are flagged together to ensure that no other - // operations can be scheduled in between. E.g. without flagging the - // copies together, a TOC access in the caller could be scheduled between - // the assignment of the callee TOC and the branch to the callee, which - // results in the TOC access going through the TOC of the callee instead - // of going through the TOC of the caller, which leads to incorrect code. - - // Load the address of the function entry point from the function - // descriptor. - SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1); - if (LDChain.getValueType() == MVT::Glue) - LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2); - - auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() - ? (MachineMemOperand::MODereferenceable | - MachineMemOperand::MOInvariant) - : MachineMemOperand::MONone; - - MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr); - SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, - /* Alignment = */ 8, MMOFlags); - - // Load environment pointer into r11. - SDValue PtrOff = DAG.getIntPtrConstant(16, dl); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); - SDValue LoadEnvPtr = - DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), - /* Alignment = */ 8, MMOFlags); - - SDValue TOCOff = DAG.getIntPtrConstant(8, dl); - SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); - SDValue TOCPtr = - DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), - /* Alignment = */ 8, MMOFlags); - - setUsesTOCBasePtr(DAG); - SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, - InFlag); - Chain = TOCVal.getValue(0); - InFlag = TOCVal.getValue(1); - - // If the function call has an explicit 'nest' parameter, it takes the - // place of the environment pointer. - if (!hasNest) { - SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, - InFlag); - - Chain = EnvVal.getValue(0); - InFlag = EnvVal.getValue(1); - } - - MTCTROps[0] = Chain; - MTCTROps[1] = LoadFuncPtr; - MTCTROps[2] = InFlag; - } - - Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, - makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); - InFlag = Chain.getValue(1); - - NodeTys.clear(); - NodeTys.push_back(MVT::Other); - NodeTys.push_back(MVT::Glue); - Ops.push_back(Chain); - CallOpc = PPCISD::BCTRL; - Callee.setNode(nullptr); - // Add use of X11 (holding environment pointer) - if (is64BitELFv1ABI && !hasNest) - Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); - // Add CTR register as callee so a bctr can be emitted later. - if (isTailCall) - Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); - } - - // If this is a direct call, pass the chain and the callee. - if (Callee.getNode()) { - Ops.push_back(Chain); - Ops.push_back(Callee); - } - // If this is a tail call add stack pointer delta. - if (isTailCall) - Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); - - // Add argument registers to the end of the list so that they are known live - // into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register - // live into the call. - // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT. - if ((isSVR4ABI && isPPC64) || isAIXABI) { - setUsesTOCBasePtr(DAG); - - // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is - // no way to mark dependencies as implicit here. - // We will add the R2/X2 dependency in EmitInstrWithCustomInserter. - if (!isPatchPoint) - Ops.push_back(DAG.getRegister(isPPC64 ? PPC::X2 - : PPC::R2, PtrVT)); - } - - return CallOpc; -} - SDValue PPCTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, @@ -5227,30 +5020,278 @@ return Chain; } -SDValue PPCTargetLowering::FinishCall( - CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, - bool isPatchPoint, bool hasNest, SelectionDAG &DAG, - SmallVector, 8> &RegsToPass, SDValue InFlag, - SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, - unsigned NumBytes, const SmallVectorImpl &Ins, - SmallVectorImpl &InVals, ImmutableCallSite CS) const { - std::vector NodeTys; - SmallVector Ops; - unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl, - SPDiff, isTailCall, isPatchPoint, hasNest, - RegsToPass, Ops, NodeTys, CS, Subtarget); +static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG, + const PPCSubtarget &Subtarget, bool isPatchPoint) { + // PatchPoint calls are not indirect. + if (isPatchPoint) + return false; + + if (isFunctionGlobalAddress(Callee) || dyn_cast(Callee)) + return false; + + // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not + // becuase the immediate function pointer points to a descriptor instead of + // a function entry point. The ELFv2 ABI cannot use a BLA because the function + // pointer immediate points to the global entry point, while the BLA would + // need to jump to the local entry point (see rL211174). + if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() && + isBLACompatibleAddress(Callee, DAG)) + return false; + + return true; +} + +static unsigned getCallOpcode(bool isIndirectCall, bool isPatchPoint, + bool isTailCall, const Function &Caller, + const SDValue &Callee, + const PPCSubtarget &Subtarget, + const TargetMachine &TM) { + if (isTailCall) + return PPCISD::TC_RETURN; + + // This is a call through a function pointer. + if (isIndirectCall) { + // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross + // indirect calls. The save of the caller's TOC pointer to the stack will be + // inserted into the DAG as part of call lowering. The restore of the TOC + // pointer is modeled by using a pseudo instruction for the call opcode that + // represents the 2 instruction sequence of an indirect branch and link, + // immediately followed by a load of the TOC pointer from the the stack save + // slot into gpr2. + if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) + return PPCISD::BCTRL_LOAD_TOC; + + // An indirect call that does not need a TOC restore. + return PPCISD::BCTRL; + } + + // The ABIs that maintain a TOC pointer accross calls need to have a nop + // immediately following the call instruction if the caller and callee may + // have different TOC bases. At link time if the linker determines the calls + // may not share a TOC base, the call is redirected to a trampoline inserted + // by the linker. The trampoline will save the callers TOC pointer at an ABI + // designated offset in the linkage area (among other things) and the linker + // will rewrite the nop to be a load of the TOC pointer from the linkage area + // into gpr2. + if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) + return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL + : PPCISD::CALL_NOP; + + return PPCISD::CALL; +} +static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, + const SDLoc &dl, const PPCSubtarget &Subtarget) { + if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI()) + if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) + return SDValue(Dest, 0); + + // Returns true if the callee is local, and false otherwise. + auto isLocalCallee = [&]() { + const GlobalAddressSDNode *G = dyn_cast(Callee); + const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); + + return DAG.getTarget().shouldAssumeDSOLocal(*Mod, + G ? G->getGlobal() : nullptr); + }; + + bool UsePlt = Subtarget.is32BitELFABI() && !isLocalCallee(); + + if (isFunctionGlobalAddress(Callee)) { + const GlobalAddressSDNode *G = cast(Callee); + if (!Subtarget.isAIXABI()) + return DAG.getTargetGlobalAddress(G->getGlobal(), dl, + Callee.getValueType(), 0, + UsePlt ? PPCII::MO_PLT : 0); + + // On AIX, direct function calls reference the symbol for the function's + // entry point, which is named by prepending a "." before the function's + // C-linkage name. + auto &Context = DAG.getMachineFunction().getMMI().getContext(); + MCSymbol *S = Context.getOrCreateSymbol(Twine(".") + + Twine(G->getGlobal()->getName())); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + return DAG.getMCSymbol(S, PtrVT); + } + + if (ExternalSymbolSDNode *S = dyn_cast(Callee)) + return DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), + UsePlt ? PPCII::MO_PLT : 0); + + // No transformation needed. + assert(Callee.getNode() && "What no callee?"); + return Callee; +} + +static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) { + assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START && + "Expected a CALLSEQ_STARTSDNode."); + + // The last operand is the chain, except when the node has glue. If the node + // has glue, then the last operand is the glue, and the chain is the second + // last operand. + SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1); + if (LastValue.getValueType() != MVT::Glue) + return LastValue; + + return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2); +} + +// Creates the node that moves a functions address into the count register +// to prepare for an indirect call instruction. +static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, + SDValue &Glue, SDValue &Chain, + const SDLoc &dl) { + SDValue MTCTROps[] = {Chain, Callee, Glue}; + EVT ReturnTypes[] = {MVT::Other, MVT::Glue}; + Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2), + makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2)); + // The glue is the second value produced. + Glue = Chain.getValue(1); +} + +static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, + SDValue &Glue, SDValue &Chain, + SDValue CallSeqStart, + ImmutableCallSite CS, const SDLoc &dl, + bool hasNest, + const PPCSubtarget &Subtarget) { + // Function pointers in the 64-bit SVR4 ABI do not point to the function + // entry point, but to the function descriptor (the function entry point + // address is part of the function descriptor though). + // The function descriptor is a three doubleword structure with the + // following fields: function entry point, TOC base address and + // environment pointer. + // Thus for a call through a function pointer, the following actions need + // to be performed: + // 1. Save the TOC of the caller in the TOC save area of its stack + // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). + // 2. Load the address of the function entry point from the function + // descriptor. + // 3. Load the TOC of the callee from the function descriptor into r2. + // 4. Load the environment pointer from the function descriptor into + // r11. + // 5. Branch to the function entry point address. + // 6. On return of the callee, the TOC of the caller needs to be + // restored (this is done in FinishCall()). + // + // The loads are scheduled at the beginning of the call sequence, and the + // register copies are flagged together to ensure that no other + // operations can be scheduled in between. E.g. without flagging the + // copies together, a TOC access in the caller could be scheduled between + // the assignment of the callee TOC and the branch to the callee, which leads + // to incorrect code. + + // Start by loading the function address from the descriptor. + SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart); + auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() + ? (MachineMemOperand::MODereferenceable | + MachineMemOperand::MOInvariant) + : MachineMemOperand::MONone; + + MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr); + + // One load for the functions entry point address. + SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, + /* Alignment = */ 8, MMOFlags); + + // One for loading the TOC anchor for the module that contains the called + // function. + SDValue TOCOff = DAG.getIntPtrConstant(8, dl); + SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); + SDValue TOCPtr = + DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), + /* Alignment = */ 8, MMOFlags); + + // One for loading the environment pointer. + SDValue PtrOff = DAG.getIntPtrConstant(16, dl); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); + SDValue LoadEnvPtr = + DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), + /* Alignment = */ 8, MMOFlags); + + // Then copy the newly loaded TOC anchor to the TOC pointer. + SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, Glue); + Chain = TOCVal.getValue(0); + Glue = TOCVal.getValue(1); + + // If the function call has an explicit 'nest' parameter, it takes the + // place of the environment pointer. + if (!hasNest) { + SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, Glue); + Chain = EnvVal.getValue(0); + Glue = EnvVal.getValue(1); + } + + // The rest of the indirect call sequence is the same as the non-descriptor + // DAG. + prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl); +} + +static void +buildCallOperands(SmallVectorImpl &Ops, CallingConv::ID CallConv, + const SDLoc &dl, bool isTailCall, bool isVarArg, + bool isPatchPoint, bool hasNest, SelectionDAG &DAG, + SmallVector, 8> &RegsToPass, + SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, + const PPCSubtarget &Subtarget, bool isIndirect) { + const bool IsPPC64 = Subtarget.isPPC64(); + + // First operand is always the chain. + Ops.push_back(Chain); + + // If it's a direct call pass the callee as the second operand. + if (!isIndirect) + Ops.push_back(Callee); + else { + assert(!isPatchPoint && "Patch point call are not indirect."); + if (Subtarget.isAIXABI()) + report_fatal_error("Indirect call on AIX is not implemented."); + + // For 64-bit ELF we have saved the TOC pointer to the linkage area on the + // stack (this would have been done in `LowerCall_64SVR4`). The call + // instruction is a pseudo instruction that represents both the indirect + // call and a load that restores the TOC pointer from the linkage area. The + // operand for the TOC restore is an add of the TOC save offset to the stack + // pointer. That must be the second operand after the chain. + if (Subtarget.is64BitELFABI()) { + SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); + unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); + SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); + SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); + Ops.push_back(AddTOC); + } + + // Add the register used for the environment pointer. + if (Subtarget.usesFunctionDescriptors() && !hasNest) + Ops.push_back(DAG.getRegister(PPC::X11, MVT::i64)); + + // Add CTR register as callee so a bctr can be emitted later. + if (isTailCall) + Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, + IsPPC64 ? MVT::i64 : MVT::i32)); + } + + // If this is a tail call add stack pointer delta. + if (isTailCall) + Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is + // no way to mark dependencies as implicit here. + // We will add the R2/X2 dependency in EmitInstrWithCustomInserter. + if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && !isPatchPoint) + Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::X2 : PPC::R2, + IsPPC64 ? MVT::i64 : MVT::i32)); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls - if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) + if (isVarArg && Subtarget.is32BitELFABI()) Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); - // When performing tail call optimization the callee pops its arguments off - // the stack. Account for this here so these bytes can be pushed back on in - // PPCFrameLowering::eliminateCallFramePseudoInstr. - int BytesCalleePops = - (CallConv == CallingConv::Fast && - getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; - // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = @@ -5258,8 +5299,40 @@ assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); - if (InFlag.getNode()) - Ops.push_back(InFlag); + // If the glue is valid, it is the last operand. + if (Glue.getNode()) + Ops.push_back(Glue); +} + +SDValue PPCTargetLowering::FinishCall( + CallingConv::ID CallConv, const SDLoc &dl, bool isTailCall, bool isVarArg, + bool isPatchPoint, bool hasNest, SelectionDAG &DAG, + SmallVector, 8> &RegsToPass, SDValue Glue, + SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, + unsigned NumBytes, const SmallVectorImpl &Ins, + SmallVectorImpl &InVals, ImmutableCallSite CS) const { + + if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) + setUsesTOCBasePtr(DAG); + + const bool isIndirect = isIndirectCall(Callee, DAG, Subtarget, isPatchPoint); + unsigned CallOpc = getCallOpcode(isIndirect, isPatchPoint, isTailCall, + DAG.getMachineFunction().getFunction(), + Callee, Subtarget, DAG.getTarget()); + + if (!isIndirect) + Callee = transformCallee(Callee, DAG, dl, Subtarget); + else if (Subtarget.usesFunctionDescriptors()) + prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CS, + dl, hasNest, Subtarget); + else + prepareIndirectCall(DAG, Callee, Glue, Chain, dl); + + // Build the operand list for the call instruction. + SmallVector Ops; + buildCallOperands(Ops, CallConv, dl, isTailCall, isVarArg, isPatchPoint, + hasNest, DAG, RegsToPass, Glue, Chain, Callee, SPDiff, + Subtarget, isIndirect); // Emit tail call. if (isTailCall) { @@ -5268,81 +5341,32 @@ Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa(Callee)) && - "Expecting an global address, external symbol, absolute value or register"); - + "Expecting an global address, external symbol, absolute value or " + "register"); + assert(CallOpc == PPCISD::TC_RETURN && + "Unexpected call opcode for a tail call."); DAG.getMachineFunction().getFrameInfo().setHasTailCall(); - return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); + return DAG.getNode(CallOpc, dl, MVT::Other, Ops); } - // Add a NOP immediately after the branch instruction when using the 64-bit - // SVR4 or the AIX ABI. - // At link time, if caller and callee are in a different module and - // thus have a different TOC, the call will be replaced with a call to a stub - // function which saves the current TOC, loads the TOC of the callee and - // branches to the callee. The NOP will be replaced with a load instruction - // which restores the TOC of the caller from the TOC save slot of the current - // stack frame. If caller and callee belong to the same module (and have the - // same TOC), the NOP will remain unchanged, or become some other NOP. + std::array ReturnTypes = {MVT::Other, MVT::Glue}; + Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops); + Glue = Chain.getValue(1); - MachineFunction &MF = DAG.getMachineFunction(); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - if (!isTailCall && !isPatchPoint && - ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) || - Subtarget.isAIXABI())) { - if (CallOpc == PPCISD::BCTRL) { - if (Subtarget.isAIXABI()) - report_fatal_error("Indirect call on AIX is not implemented."); - - // This is a call through a function pointer. - // Restore the caller TOC from the save area into R2. - // See PrepareCall() for more information about calls through function - // pointers in the 64-bit SVR4 ABI. - // We are using a target-specific load with r2 hard coded, because the - // result of a target-independent load would never go directly into r2, - // since r2 is a reserved register (which prevents the register allocator - // from allocating it), resulting in an additional register being - // allocated and an unnecessary move instruction being generated. - CallOpc = PPCISD::BCTRL_LOAD_TOC; - - SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); - unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); - SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); - SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); - - // The address needs to go after the chain input but before the flag (or - // any other variadic arguments). - Ops.insert(std::next(Ops.begin()), AddTOC); - } else if (CallOpc == PPCISD::CALL && - !callsShareTOCBase(&MF.getFunction(), Callee, DAG.getTarget())) { - // Otherwise insert NOP for non-local calls. - CallOpc = PPCISD::CALL_NOP; - } - } - - if (Subtarget.isAIXABI() && isFunctionGlobalAddress(Callee)) { - // On AIX, direct function calls reference the symbol for the function's - // entry point, which is named by inserting a "." before the function's - // C-linkage name. - GlobalAddressSDNode *G = cast(Callee); - auto &Context = DAG.getMachineFunction().getMMI().getContext(); - MCSymbol *S = Context.getOrCreateSymbol(Twine(".") + - Twine(G->getGlobal()->getName())); - Callee = DAG.getMCSymbol(S, PtrVT); - // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode. - Ops[1] = Callee; - } - - Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); - InFlag = Chain.getValue(1); + // When performing tail call optimization the callee pops its arguments off + // the stack. Account for this here so these bytes can be pushed back on in + // PPCFrameLowering::eliminateCallFramePseudoInstr. + int BytesCalleePops = (CallConv == CallingConv::Fast && + getTargetMachine().Options.GuaranteedTailCallOpt) + ? NumBytes + : 0; Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(BytesCalleePops, dl, true), - InFlag, dl); - if (!Ins.empty()) - InFlag = Chain.getValue(1); + Glue, dl); + Glue = Chain.getValue(1); - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, - Ins, dl, DAG, InVals); + return LowerCallResult(Chain, Glue, CallConv, isVarArg, Ins, dl, DAG, InVals); } SDValue @@ -6295,8 +6319,8 @@ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Check if this is an indirect call (MTCTR/BCTRL). - // See PrepareCall() for more information about calls through function - // pointers in the 64-bit SVR4 ABI. + // See prepareDescriptorIndirectCall and buildCallOperands for more + // information about calls through function pointers in the 64-bit SVR4 ABI. if (!isTailCall && !isPatchPoint && !isFunctionGlobalAddress(Callee) && !isa(Callee)) { diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -347,6 +347,13 @@ /// True if the GV will be accessed via an indirect symbol. bool isGVIndirectSymbol(const GlobalValue *GV) const; + /// True if the ABI is descriptor based. + bool usesFunctionDescriptors() const { + // Both 32-bit and 64-bit AIX are descriptor based. For ELF only the 64-bit + // v1 ABI uses descriptors. + return isAIXABI() || (is64BitELFABI() && !isELFv2ABI()); + } + bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; } }; } // End llvm namespace