Index: lib/Target/AArch64/AArch64CallingConvention.td =================================================================== --- lib/Target/AArch64/AArch64CallingConvention.td +++ lib/Target/AArch64/AArch64CallingConvention.td @@ -288,6 +288,14 @@ (sub (sequence "X%u", 1, 28), X15, X16, X17, X18), (sequence "D%u", 0, 31))>; +// CSRs that are handled by prologue, epilogue. +def CSR_AArch64_CXX_TLS_Darwin_PE + : CalleeSavedRegs<(add LR, FP)>; + +// CSRs that are handled explicitly via copies. +def CSR_AArch64_CXX_TLS_Darwin_ViaCopy + : CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>; + // The ELF stub used for TLS-descriptor access saves every feasible // register. Only X0 and LR are clobbered. def CSR_AArch64_TLS_ELF Index: lib/Target/AArch64/AArch64FastISel.cpp =================================================================== --- lib/Target/AArch64/AArch64FastISel.cpp +++ lib/Target/AArch64/AArch64FastISel.cpp @@ -3646,6 +3646,10 @@ if (F.isVarArg()) return false; + if (F.getCallingConv() == CallingConv::CXX_FAST_TLS && + TLI.supportSplitCSR()) + return false; + // Build a list of return value registers. SmallVector RetRegs; Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -388,6 +388,11 @@ bool isCheapToSpeculateCtlz() const override { return true; } + bool supportSplitCSR() const override { + return true; + } + void handleSplitCSR(MachineBasicBlock *Entry, + MachineBasicBlock *Exit) const override; private: bool isExtFreeImpl(const Instruction *Ext) const override; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3268,6 +3268,20 @@ Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } + const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); + const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); + if (CallConv == CallingConv::CXX_FAST_TLS && I) { + for (; *I; ++I) { + if (AArch64::GPR64RegClass.contains(*I)) { + RetOps.push_back(DAG.getRegister(*I, getPointerTy(DAG.getDataLayout()))); + } + else if (AArch64::FPR64RegClass.contains(*I)) { + RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); + } + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + } + } RetOps[0] = Chain; // Update chain. @@ -10009,3 +10023,38 @@ IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset), Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0)); } + +void AArch64TargetLowering::handleSplitCSR(MachineBasicBlock *Entry, + MachineBasicBlock *Exit) const { + // Update IsSplitCSR in AArch64unctionInfo. + AArch64FunctionInfo *AFI = + Entry->getParent()->getInfo(); + AFI->setIsSplitCSR(true); + if (!Exit) + return; + + const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); + const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); + if (!I) + return; + + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); + for (; *I; ++I) { + const TargetRegisterClass *RC = nullptr; + if (AArch64::GPR64RegClass.contains(*I)) + RC = &AArch64::GPR64RegClass; + else if (AArch64::FPR64RegClass.contains(*I)) + RC = &AArch64::FPR64RegClass; + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + unsigned NewVR = MRI->createVirtualRegister(RC); + Entry->addLiveIn(*I); + BuildMI(*Entry, Entry->begin(), DebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) + .addReg(*I); + BuildMI(*Exit, Exit->begin(), DebugLoc(), + TII->get(TargetOpcode::COPY), *I) + .addReg(NewVR); + } +} Index: lib/Target/AArch64/AArch64MachineFunctionInfo.h =================================================================== --- lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -72,16 +72,20 @@ /// registers. unsigned VarArgsFPRSize; + bool IsSplitCSR; + public: AArch64FunctionInfo() : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), - VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {} + VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0), + IsSplitCSR(false) {} explicit AArch64FunctionInfo(MachineFunction &MF) : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), - VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) { + VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0), + IsSplitCSR(false) { (void)MF; } @@ -96,6 +100,9 @@ bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } + bool isSplitCSR() const { return IsSplitCSR; } + void setIsSplitCSR(bool s) { IsSplitCSR = s; } + void setLocalStackSize(unsigned Size) { LocalStackSize = Size; } unsigned getLocalStackSize() const { return LocalStackSize; } Index: lib/Target/AArch64/AArch64RegisterInfo.h =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.h +++ lib/Target/AArch64/AArch64RegisterInfo.h @@ -35,6 +35,7 @@ /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override; Index: lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.cpp +++ lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -15,6 +15,7 @@ #include "AArch64RegisterInfo.h" #include "AArch64FrameLowering.h" #include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" @@ -47,11 +48,22 @@ if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) return CSR_AArch64_AllRegs_SaveList; if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS) - return CSR_AArch64_CXX_TLS_Darwin_SaveList; + return MF->getInfo()->isSplitCSR() ? + CSR_AArch64_CXX_TLS_Darwin_PE_SaveList : + CSR_AArch64_CXX_TLS_Darwin_SaveList; else return CSR_AArch64_AAPCS_SaveList; } +const MCPhysReg * +AArch64RegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const { + assert(MF && "Invalid MachineFunction pointer."); + if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getInfo()->isSplitCSR()) + return CSR_AArch64_CXX_TLS_Darwin_ViaCopy_SaveList; + return nullptr; +} + const uint32_t * AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { Index: test/CodeGen/AArch64/cxx-tlscc.ll =================================================================== --- test/CodeGen/AArch64/cxx-tlscc.ll +++ test/CodeGen/AArch64/cxx-tlscc.ll @@ -28,50 +28,49 @@ } ; CHECK-LABEL: _ZTW2sg -; CHECK-DAG: stp d31, d30 -; CHECK-DAG: stp d29, d28 -; CHECK-DAG: stp d27, d26 -; CHECK-DAG: stp d25, d24 -; CHECK-DAG: stp d23, d22 -; CHECK-DAG: stp d21, d20 -; CHECK-DAG: stp d19, d18 -; CHECK-DAG: stp d17, d16 -; CHECK-DAG: stp d7, d6 -; CHECK-DAG: stp d5, d4 -; CHECK-DAG: stp d3, d2 -; CHECK-DAG: stp d1, d0 -; CHECK-DAG: stp x20, x19 -; CHECK-DAG: stp x14, x13 -; CHECK-DAG: stp x12, x11 -; CHECK-DAG: stp x10, x9 -; CHECK-DAG: stp x8, x7 -; CHECK-DAG: stp x6, x5 -; CHECK-DAG: stp x4, x3 -; CHECK-DAG: stp x2, x1 -; CHECK-DAG: stp x29, x30 +; CHECK-NOT: stp d31, d30 +; CHECK-NOT: stp d29, d28 +; CHECK-NOT: stp d27, d26 +; CHECK-NOT: stp d25, d24 +; CHECK-NOT: stp d23, d22 +; CHECK-NOT: stp d21, d20 +; CHECK-NOT: stp d19, d18 +; CHECK-NOT: stp d17, d16 +; CHECK-NOT: stp d7, d6 +; CHECK-NOT: stp d5, d4 +; CHECK-NOT: stp d3, d2 +; CHECK-NOT: stp d1, d0 +; CHECK-NOT: stp x20, x19 +; CHECK-NOT: stp x14, x13 +; CHECK-NOT: stp x12, x11 +; CHECK-NOT: stp x10, x9 +; CHECK-NOT: stp x8, x7 +; CHECK-NOT: stp x6, x5 +; CHECK-NOT: stp x4, x3 +; CHECK-NOT: stp x2, x1 ; CHECK: blr ; CHECK: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]] ; CHECK: blr ; CHECK: tlv_atexit ; CHECK: [[BB_end]]: ; CHECK: blr -; CHECK-DAG: ldp x2, x1 -; CHECK-DAG: ldp x4, x3 -; CHECK-DAG: ldp x6, x5 -; CHECK-DAG: ldp x8, x7 -; CHECK-DAG: ldp x10, x9 -; CHECK-DAG: ldp x12, x11 -; CHECK-DAG: ldp x14, x13 -; CHECK-DAG: ldp x20, x19 -; CHECK-DAG: ldp d1, d0 -; CHECK-DAG: ldp d3, d2 -; CHECK-DAG: ldp d5, d4 -; CHECK-DAG: ldp d7, d6 -; CHECK-DAG: ldp d17, d16 -; CHECK-DAG: ldp d19, d18 -; CHECK-DAG: ldp d21, d20 -; CHECK-DAG: ldp d23, d22 -; CHECK-DAG: ldp d25, d24 -; CHECK-DAG: ldp d27, d26 -; CHECK-DAG: ldp d29, d28 -; CHECK-DAG: ldp d31, d30 +; CHECK-NOT: ldp x2, x1 +; CHECK-NOT: ldp x4, x3 +; CHECK-NOT: ldp x6, x5 +; CHECK-NOT: ldp x8, x7 +; CHECK-NOT: ldp x10, x9 +; CHECK-NOT: ldp x12, x11 +; CHECK-NOT: ldp x14, x13 +; CHECK-NOT: ldp x20, x19 +; CHECK-NOT: ldp d1, d0 +; CHECK-NOT: ldp d3, d2 +; CHECK-NOT: ldp d5, d4 +; CHECK-NOT: ldp d7, d6 +; CHECK-NOT: ldp d17, d16 +; CHECK-NOT: ldp d19, d18 +; CHECK-NOT: ldp d21, d20 +; CHECK-NOT: ldp d23, d22 +; CHECK-NOT: ldp d25, d24 +; CHECK-NOT: ldp d27, d26 +; CHECK-NOT: ldp d29, d28 +; CHECK-NOT: ldp d31, d30