diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -1318,6 +1318,7 @@ BCLalways, BCLn, BCTRL8_LDinto_toc, + BCTRL_LWZinto_toc, BCn, CTRL_DEP )>; diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -2441,10 +2441,6 @@ } unsigned PPCFrameLowering::getTOCSaveOffset() const { - if (Subtarget.isAIXABI()) - // TOC save/restore is normally handled by the linker. - // Indirect calls should hit this limitation. - report_fatal_error("TOC save is not implemented on AIX yet."); return TOCSaveOffset; } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -174,7 +174,8 @@ BCTRL, /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl - /// instruction and the TOC reload required on SVR4 PPC64. + /// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX + /// and 64-bit AIX. BCTRL_LOAD_TOC, /// Return with a flag operand, matched by 'blr' diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3153,11 +3153,17 @@ SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget.isAIXABI()) + report_fatal_error("ADJUST_TRAMPOLINE operation not supported on AIX."); + return Op.getOperand(0); } SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget.isAIXABI()) + report_fatal_error("INIT_TRAMPOLINE operation not supported on AIX."); + SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -5208,34 +5214,48 @@ MachinePointerInfo MPI(CS ? CS.getCalledValue() : nullptr); + // Registers used in building the DAG. + const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister(); + const MCRegister TOCReg = Subtarget.getTOCPointerRegister(); + + // Offsets of descriptor members. + const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset(); + const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset(); + + const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; + const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4; + // One load for the functions entry point address. - SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI, - /* Alignment = */ 8, MMOFlags); + SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI, + Alignment, MMOFlags); // One for loading the TOC anchor for the module that contains the called // function. - SDValue TOCOff = DAG.getIntPtrConstant(8, dl); - SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); + SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl); + SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff); SDValue TOCPtr = - DAG.getLoad(MVT::i64, dl, LDChain, AddTOC, MPI.getWithOffset(8), - /* Alignment = */ 8, MMOFlags); + DAG.getLoad(RegVT, dl, LDChain, AddTOC, + MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags); // One for loading the environment pointer. - SDValue PtrOff = DAG.getIntPtrConstant(16, dl); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); + SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff); SDValue LoadEnvPtr = - DAG.getLoad(MVT::i64, dl, LDChain, AddPtr, MPI.getWithOffset(16), - /* Alignment = */ 8, MMOFlags); + DAG.getLoad(RegVT, dl, LDChain, AddPtr, + MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags); + // Then copy the newly loaded TOC anchor to the TOC pointer. - SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr, Glue); + SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue); Chain = TOCVal.getValue(0); Glue = TOCVal.getValue(1); // If the function call has an explicit 'nest' parameter, it takes the // place of the environment pointer. + assert((!hasNest || !Subtarget.isAIXABI()) && + "Nest parameter not supported on AIX"); if (!hasNest) { - SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, Glue); + SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue); Chain = EnvVal.getValue(0); Glue = EnvVal.getValue(1); } @@ -5264,27 +5284,29 @@ Ops.push_back(Callee); else { assert(!isPatchPoint && "Patch point call are not indirect."); - if (Subtarget.isAIXABI()) - report_fatal_error("Indirect call on AIX is not implemented."); - - // For 64-bit ELF we have saved the TOC pointer to the linkage area on the - // stack (this would have been done in `LowerCall_64SVR4`). The call - // instruction is a pseudo instruction that represents both the indirect - // branch and a load that restores the TOC pointer from the linkage area. - // The operand for the TOC restore is an add of the TOC save offset to the - // stack pointer. This must be the second operand: after the chain input but - // before any other variadic arguments. - if (Subtarget.is64BitELFABI()) { - SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); + + // For the TOC based ABIs we have saved the TOC pointer to the linkage area + // on the stack (this would have been done in `LowerCall_64SVR4` or + // `LowerCall_AIX`). The call instruction is a pseudo instruction that + // represents both the indirect branch and a load that restores the TOC + // pointer from the linkage area. The operand for the TOC restore is an add + // of the TOC save offset to the stack pointer. This must be the second + // operand: after the chain input but before any other variadic arguments. + if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) { + const MCRegister StackPtrReg = Subtarget.getStackPointerRegister(); + + SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT); unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); - SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); + SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff); Ops.push_back(AddTOC); } // Add the register used for the environment pointer. if (Subtarget.usesFunctionDescriptors() && !hasNest) - Ops.push_back(DAG.getRegister(PPC::X11, MVT::i64)); + Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(), + RegVT)); + // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) @@ -5305,7 +5327,7 @@ // no way to mark dependencies as implicit here. // We will add the R2/X2 dependency in EmitInstrWithCustomInserter. if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && !isPatchPoint) - Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::X2 : PPC::R2, RegVT)); + Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT)); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls if (isVarArg && Subtarget.is32BitELFABI()) @@ -6961,9 +6983,6 @@ if (isVarArg || isPatchPoint) report_fatal_error("This call type is unimplemented on AIX."); - if (!isFunctionGlobalAddress(Callee) && !isa(Callee)) - report_fatal_error("Handling of indirect call is unimplemented!"); - const PPCSubtarget& Subtarget = static_cast(DAG.getSubtarget()); if (Subtarget.hasQPX()) @@ -7022,6 +7041,26 @@ "unimplemented!"); } + // For indirect calls, we need to save the TOC base to the stack for + // restoration after the call. + if (!isTailCall && !isPatchPoint && + !isFunctionGlobalAddress(Callee) && !isa(Callee)) { + const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister(); + const MCRegister StackPtrReg = Subtarget.getStackPointerRegister(); + const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; + const unsigned TOCSaveOffset = + Subtarget.getFrameLowering()->getTOCSaveOffset(); + + setUsesTOCBasePtr(DAG); + SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT); + SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); + SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT); + SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, + MachinePointerInfo::getStack( + DAG.getMachineFunction(), TOCSaveOffset)); + } + // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td --- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -1529,6 +1529,29 @@ let BH = 0; } +class XLForm_2_ext_and_DForm_1 opcode1, bits<10>xo1, bits<5> bo, + bits<5> bi, bits<2> bh, bit lk, bits<6> opcode2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I2 { + + bits<5> RST; + bits<19> DS_RA; + + let Pattern = pattern; + + let Inst{6-10} = bo; + let Inst{11-15} = bi; + let Inst{16-18} = 0; // unused. + let Inst{19-20} = bh; + let Inst{21-30} = xo1; + let Inst{31} = lk; + + let Inst{38-42} = RST; + let Inst{43-47} = DS_RA{18-14}; // Register # + let Inst{48-61} = DS_RA{13-0}; // Displacement. +} + // 1.7.8 XFX-Form class XFXForm_1 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin> diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1645,6 +1645,15 @@ "#TC_RETURNr $dst $offset", []>; +let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, + Defs = [LR, R2], Uses = [CTR, RM], RST = 2 in { + def BCTRL_LWZinto_toc: + XLForm_2_ext_and_DForm_1<19, 528, 20, 0, 0, 1, 32, (outs), + (ins memrix:$src), "bctrl\n\tlwz 2, $src", IIC_BrB, + [(PPCbctrl_load_toc iaddrX4:$src)]>, Requires<[In32BitMode]>; + +} + let isCodeGenOnly = 1 in { diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -358,6 +358,34 @@ return isAIXABI() || (is64BitELFABI() && !isELFv2ABI()); } + unsigned descriptorTOCAnchorOffset() const { + assert(usesFunctionDescriptors() && + "should only be called when target uses descriptors."); + return IsPPC64 ? 8 : 4; + } + + unsigned descriptorEnvironmentPointerOffset() const { + assert(usesFunctionDescriptors() && + "should only be called when target uses descriptors."); + return IsPPC64 ? 16 : 8; + } + + MCRegister getEnvironmentPointerRegister() const { + assert(usesFunctionDescriptors() && + "should only be called when target uses descriptors."); + return IsPPC64 ? PPC::X11 : PPC::R11; + } + + MCRegister getTOCPointerRegister() const { + assert((is64BitELFABI() || isAIXABI()) && + "should only be called when target is TOC based ABI."); + return IsPPC64 ? PPC::X2 : PPC::R2; + } + + MCRegister getStackPointerRegister() const { + return IsPPC64 ? PPC::X1 : PPC::R1; + } + bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; } }; } // End llvm namespace