Index: llvm/trunk/lib/Target/PowerPC/P9InstrResources.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/P9InstrResources.td +++ llvm/trunk/lib/Target/PowerPC/P9InstrResources.td @@ -1288,7 +1288,7 @@ (instregex "BCCTR(L)?(8)?(n)?$"), (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), - (instregex "BL(_TLS)?$"), + (instregex "BL(_TLS|_NOP)?$"), (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"), (instregex "BLA(8|8_NOP)?$"), (instregex "BLR(8|L)?$"), Index: llvm/trunk/lib/Target/PowerPC/PPCCallingConv.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCCallingConv.td +++ llvm/trunk/lib/Target/PowerPC/PPCCallingConv.td @@ -306,6 +306,13 @@ def CSR_SVR432_SPE : CalleeSavedRegs<(add CSR_SVR432_COMM, CSR_SPE)>; +def CSR_AIX32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, + R29, R30, R31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4 + )>; + def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, X29, X30, X31, F14, F15, F16, F17, F18, @@ -322,6 +329,13 @@ F27, F28, F29, F30, F31, CR2, CR3, CR4 )>; +def CSR_AIX64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, + X21, X22, X23, X24, X25, X26, X27, X28, + X29, X30, X31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4 + )>; + // CSRs that are handled by prologue, epilogue. def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>; Index: llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -71,10 +71,10 @@ } static unsigned computeLinkageSize(const PPCSubtarget &STI) { - if (STI.isDarwinABI() || STI.isPPC64()) + if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64()) return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); - // SVR4 ABI: + // 32-bit SVR4 ABI: return 8; } Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h @@ -160,7 +160,7 @@ /// CALL - A direct function call. /// CALL_NOP is a call with the special NOP which follows 64-bit - /// SVR4 calls. + /// SVR4 calls and 32-bit/64-bit AIX calls. CALL, CALL_NOP, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a @@ -1120,6 +1120,15 @@ const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, ImmutableCallSite CS) const; + SDValue LowerCall_AIX(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, bool isPatchPoint, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, + const SDLoc &dl, SelectionDAG &DAG, + SmallVectorImpl &InVals, + ImmutableCallSite CS) const; SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5160,18 +5160,23 @@ } // Add a NOP immediately after the branch instruction when using the 64-bit - // SVR4 ABI. At link time, if caller and callee are in a different module and + // SVR4 or the AIX ABI. + // At link time, if caller and callee are in a different module and // thus have a different TOC, the call will be replaced with a call to a stub // function which saves the current TOC, loads the TOC of the callee and // branches to the callee. The NOP will be replaced with a load instruction // which restores the TOC of the caller from the TOC save slot of the current // stack frame. If caller and callee belong to the same module (and have the - // same TOC), the NOP will remain unchanged. + // same TOC), the NOP will remain unchanged, or become some other NOP. MachineFunction &MF = DAG.getMachineFunction(); - if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && - !isPatchPoint) { + if (!isTailCall && !isPatchPoint && + ((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) || + Subtarget.isAIXABI())) { if (CallOpc == PPCISD::BCTRL) { + if (Subtarget.isAIXABI()) + report_fatal_error("Indirect call on AIX is not implemented."); + // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. // See PrepareCall() for more information about calls through function @@ -5268,16 +5273,20 @@ !isTailCall) Callee = LowerGlobalAddress(Callee, DAG); - if (Subtarget.isSVR4ABI()) { - if (Subtarget.isPPC64()) - return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, isPatchPoint, Outs, OutVals, Ins, - dl, DAG, InVals, CS); - else - return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, isPatchPoint, Outs, OutVals, Ins, - dl, DAG, InVals, CS); - } + if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) + return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, + isTailCall, isPatchPoint, Outs, OutVals, Ins, + dl, DAG, InVals, CS); + + if (Subtarget.isSVR4ABI()) + return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, + isTailCall, isPatchPoint, Outs, OutVals, Ins, + dl, DAG, InVals, CS); + + if (Subtarget.isAIXABI()) + return LowerCall_AIX(Chain, Callee, CallConv, isVarArg, + isTailCall, isPatchPoint, Outs, OutVals, Ins, + dl, DAG, InVals, CS); return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, isTailCall, isPatchPoint, Outs, OutVals, Ins, @@ -6567,6 +6576,67 @@ NumBytes, Ins, InVals, CS); } + +SDValue PPCTargetLowering::LowerCall_AIX( + SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, bool isPatchPoint, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, const SDLoc &dl, + SelectionDAG &DAG, SmallVectorImpl &InVals, + ImmutableCallSite CS) const { + + assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) && + "Unimplemented calling convention!"); + if (isVarArg || isPatchPoint) + report_fatal_error("This call type is unimplemented on AIX."); + + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + bool isPPC64 = PtrVT == MVT::i64; + unsigned PtrByteSize = isPPC64 ? 8 : 4; + unsigned NumOps = Outs.size(); + + if (NumOps != 0) + report_fatal_error("Call lowering with parameters is not implemented " + "on AIX yet."); + + // Count how many bytes are to be pushed on the stack, including the linkage + // area, parameter list area. + // On XCOFF, we start with 24/48, which is reserved space for + // [SP][CR][LR][2 x reserved][TOC]. + unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); + + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if the callee + // is variadic. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + unsigned NumBytes = LinkageSize + 8 * PtrByteSize; + + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog + // inserter pass. + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); + SDValue CallSeqStart = Chain; + + if (!isFunctionGlobalAddress(Callee) && + !isa(Callee)) + report_fatal_error("Handling of indirect call is unimplemented!"); + + SmallVector, 8> RegsToPass; + SDValue InFlag; + + if (isTailCall) + report_fatal_error("Handling of tail call is unimplemented!"); + int SPDiff = 0; + + return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint, + /* unused except on PPC64 ELFv1 */ false, DAG, + RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, + NumBytes, Ins, InVals, CS); +} + bool PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, Index: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td @@ -1469,6 +1469,9 @@ def BCLn : BForm_4<16, 4, 0, 1, (outs), (ins crbitrc:$bi, condbrtarget:$dst), "bcl 4, $bi, $dst">; + def BL_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func), + "bl $func\n\tnop", IIC_BrB, []>; } } let Uses = [CTR, RM] in { @@ -3029,6 +3032,9 @@ // Calls def : Pat<(PPCcall (i32 tglobaladdr:$dst)), (BL tglobaladdr:$dst)>; +def : Pat<(PPCcall_nop (i32 tglobaladdr:$dst)), + (BL_NOP tglobaladdr:$dst)>; + def : Pat<(PPCcall (i32 texternalsym:$dst)), (BL texternalsym:$dst)>; Index: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -228,6 +228,10 @@ : CSR_Darwin64_RegMask) : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask : CSR_Darwin32_RegMask); + if (Subtarget.isAIXABI()) { + assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet."); + return TM.isPPC64() ? CSR_AIX64_RegMask : CSR_AIX32_RegMask; + } if (CC == CallingConv::Cold) { return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask Index: llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h +++ llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h @@ -314,7 +314,8 @@ bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isDarwinABI() const { return isTargetMachO() || isDarwin(); } - bool isSVR4ABI() const { return !isDarwinABI(); } + bool isAIXABI() const { return TargetTriple.isOSAIX(); } + bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); } bool isELFv2ABI() const; /// Originally, this function return hasISEL(). Now we always enable it, Index: llvm/trunk/lib/Target/TargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/TargetMachine.cpp +++ llvm/trunk/lib/Target/TargetMachine.cpp @@ -173,6 +173,11 @@ return GV && GV->isStrongDefinitionForLinker(); } + // Due to the AIX linkage model, any global with default visibility is + // considered non-local. + if (TT.isOSBinFormatXCOFF()) + return false; + assert(TT.isOSBinFormatELF() || TT.isOSBinFormatWasm()); assert(RM != Reloc::DynamicNoPIC); Index: llvm/trunk/test/CodeGen/PowerPC/test_call_aix.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/test_call_aix.ll +++ llvm/trunk/test/CodeGen/PowerPC/test_call_aix.ll @@ -0,0 +1,40 @@ +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp < %s | \ +; RUN: FileCheck --check-prefix=32BIT %s + +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp < %s | \ +; RUN: FileCheck --check-prefix=64BIT %s + +declare void @foo(...) + +define void @test_call() { +entry: +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT: BL_NOP @foo, csr_aix32, implicit-def dead $lr, implicit $rm, implicit-def $r1 +; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: BL8_NOP @foo, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit-def $r1 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + + call void bitcast (void (...)* @foo to void ()*)() + ret void +} + +define hidden void @foo_local() { +entry: + ret void +} + +define void @test_local_call() { +entry: +; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1 +; 32BIT: BL @foo_local, csr_aix32, implicit-def dead $lr, implicit $rm, implicit-def $r1 +; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1 + +; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1 +; 64BIT: BL8 @foo_local, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit-def $r1 +; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1 + + call void @foo_local() + ret void +}