Index: lib/CodeGen/TargetFrameLoweringImpl.cpp =================================================================== --- lib/CodeGen/TargetFrameLoweringImpl.cpp +++ lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -63,12 +63,15 @@ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); + // Resize before the early returns. Some backends expect that + // SavedRegs.size() == TRI.getNumRegs() after this call even if there are no + // saved registers. + SavedRegs.resize(TRI.getNumRegs()); + // Early exit if there are no callee saved registers. if (!CSRegs || CSRegs[0] == 0) return; - SavedRegs.resize(TRI.getNumRegs()); - // In Naked functions we aren't going to save any registers. if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) return; Index: lib/Target/PowerPC/PPCCallingConv.td =================================================================== --- lib/Target/PowerPC/PPCCallingConv.td +++ lib/Target/PowerPC/PPCCallingConv.td @@ -243,12 +243,23 @@ F27, F28, F29, F30, F31, CR2, CR3, CR4 )>; +// CSRs that are handled by prologue, epilogue. +def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>; + +def CSR_SVR464_ViaCopy : CalleeSavedRegs<(add CSR_SVR464)>; + def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>; +def CSR_SVR464_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_Altivec)>; + def CSR_SVR464_R2 : CalleeSavedRegs<(add CSR_SVR464, X2)>; +def CSR_SVR464_R2_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2)>; + def CSR_SVR464_R2_Altivec : CalleeSavedRegs<(add CSR_SVR464_Altivec, X2)>; +def CSR_SVR464_R2_Altivec_ViaCopy : CalleeSavedRegs<(add CSR_SVR464_R2_Altivec)>; + def CSR_NoRegs : CalleeSavedRegs<(add)>; def CSR_64_AllRegs: CalleeSavedRegs<(add X0, (sequence "X%u", 3, 10), Index: lib/Target/PowerPC/PPCFastISel.cpp =================================================================== --- lib/Target/PowerPC/PPCFastISel.cpp +++ lib/Target/PowerPC/PPCFastISel.cpp @@ -1583,6 +1583,9 @@ if (!FuncInfo.CanLowerReturn) return false; + if (TLI.supportSplitCSR(FuncInfo.MF)) + return false; + const ReturnInst *Ret = cast(I); const Function &F = *I->getParent()->getParent(); Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -442,6 +442,18 @@ return true; } + bool supportSplitCSR(MachineFunction *MF) const override { + return + MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); + } + + void initializeSplitCSR(MachineBasicBlock *Entry) const override; + + void insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl &Exits) const override; + /// getSetCCResultType - Return the ISD::SETCC ValueType EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -5818,6 +5818,25 @@ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } + const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const MCPhysReg *I = + TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); + if (I) { + for (; *I; ++I) { + + if (PPC::G8RCRegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::i64)); + else if (PPC::F8RCRegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); + else if (PPC::CRRCRegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::i1)); + else if (PPC::VRRCRegClass.contains(*I)) + RetOps.push_back(DAG.getRegister(*I, MVT::Other)); + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + } + } + RetOps[0] = Chain; // Update chain. // Add the flag if we have it. @@ -11579,3 +11598,57 @@ const TargetLibraryInfo *LibInfo) const { return PPC::createFastISel(FuncInfo, LibInfo); } + +void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { + if (Subtarget.isDarwinABI()) return; + if (!Subtarget.isPPC64()) return; + + // Update IsSplitCSR in PPCFunctionInfo + PPCFunctionInfo *PFI = Entry->getParent()->getInfo(); + PFI->setIsSplitCSR(true); +} + +void PPCTargetLowering::insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl &Exits) const { + const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); + if (!IStart) + return; + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); + MachineBasicBlock::iterator MBBI = Entry->begin(); + for (const MCPhysReg *I = IStart; *I; ++I) { + const TargetRegisterClass *RC = nullptr; + if (PPC::G8RCRegClass.contains(*I)) + RC = &PPC::G8RCRegClass; + else if (PPC::F8RCRegClass.contains(*I)) + RC = &PPC::F8RCRegClass; + else if (PPC::CRRCRegClass.contains(*I)) + RC = &PPC::CRRCRegClass; + else if (PPC::VRRCRegClass.contains(*I)) + RC = &PPC::VRRCRegClass; + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + + unsigned NewVR = MRI->createVirtualRegister(RC); + // Create copy from CSR to a virtual register. + // FIXME: this currently does not emit CFI pseudo-instructions, it works + // fine for CXX_FAST_TLS since the C++-style TLS access functions should be + // nounwind. If we want to generalize this later, we may need to emit + // CFI pseudo-instructions. + assert(Entry->getParent()->getFunction()->hasFnAttribute( + Attribute::NoUnwind) && + "Function should be nounwind in insertCopiesSplitCSR!"); + Entry->addLiveIn(*I); + BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) + .addReg(*I); + + // Insert the copy-back instructions right before the terminator + for (auto *Exit : Exits) + BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), *I) + .addReg(NewVR); + } +} Index: lib/Target/PowerPC/PPCMachineFunctionInfo.h =================================================================== --- lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -104,6 +104,10 @@ /// Whether this uses the PIC Base register or not. bool UsesPICBase; + /// True if this function has a subset of CSRs that is handled explicitly via + /// copies + bool IsSplitCSR; + public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), @@ -125,7 +129,8 @@ VarArgsNumFPR(0), CRSpillFrameIndex(0), MF(MF), - UsesPICBase(0) {} + UsesPICBase(0), + IsSplitCSR(false) {} int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } @@ -196,6 +201,9 @@ void setUsesPICBase(bool uses) { UsesPICBase = uses; } bool usesPICBase() const { return UsesPICBase; } + bool isSplitCSR() const { return IsSplitCSR; } + void setIsSplitCSR(bool s) { IsSplitCSR = s; } + MCSymbol *getPICOffsetSymbol() const; MCSymbol *getGlobalEPSymbol() const; Index: lib/Target/PowerPC/PPCRegisterInfo.h =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.h +++ lib/Target/PowerPC/PPCRegisterInfo.h @@ -75,6 +75,7 @@ /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const override; const uint32_t *getNoPreservedMask() const override; Index: lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.cpp +++ lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -116,6 +116,9 @@ : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList : CSR_Darwin32_SaveList); + if (TM.isPPC64() && MF->getInfo()->isSplitCSR()) + return CSR_SRV464_TLS_PE_SaveList; + // On PPC64, we might need to save r2 (but only if it is not reserved). bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2); @@ -128,6 +131,31 @@ : CSR_SVR432_SaveList); } +const MCPhysReg * +PPCRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const { + assert(MF && "Invalid MachineFunction pointer."); + const PPCSubtarget &Subtarget = MF->getSubtarget(); + if (Subtarget.isDarwinABI()) + return nullptr; + if (!TM.isPPC64()) + return nullptr; + if (MF->getFunction()->getCallingConv() != CallingConv::CXX_FAST_TLS) + return nullptr; + if (!MF->getInfo()->isSplitCSR()) + return nullptr; + + // On PPC64, we might need to save r2 (but only if it is not reserved). + bool SaveR2 = !getReservedRegs(*MF).test(PPC::X2); + if (Subtarget.hasAltivec()) + return SaveR2 + ? CSR_SVR464_R2_Altivec_ViaCopy_SaveList + : CSR_SVR464_Altivec_ViaCopy_SaveList; + else + return SaveR2 + ? CSR_SVR464_R2_ViaCopy_SaveList + : CSR_SVR464_ViaCopy_SaveList; +} + const uint32_t * PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { Index: test/CodeGen/PowerPC/cxx_tlscc64.ll =================================================================== --- test/CodeGen/PowerPC/cxx_tlscc64.ll +++ test/CodeGen/PowerPC/cxx_tlscc64.ll @@ -0,0 +1,43 @@ +; RUN: llc < %s --enable-shrink-wrap=false -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s +%struct.S = type { i8 } + +@sg = internal thread_local global %struct.S zeroinitializer, align 1 +@__dso_handle = external global i8 +@__tls_guard = internal thread_local unnamed_addr global i1 false +@sum1 = internal thread_local global i32 0, align 4 + +declare void @_ZN1SC1Ev(%struct.S*) +declare void @_ZN1SD1Ev(%struct.S*) +declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*) + +; CHECK-LABEL: _ZTW2sg +define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind { + %.b.i = load i1, i1* @__tls_guard, align 1 +; CHECK: bc 12, 1, [[BB_end:.?LBB0_[0-9]+]] + br i1 %.b.i, label %__tls_init.exit, label %init.i + +init.i: +; CHECK: Folded Spill + store i1 true, i1* @__tls_guard, align 1 + tail call void @_ZN1SC1Ev(%struct.S* nonnull @sg) #2 +; CHECK: bl _ZN1SC1Ev + %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (void (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) #2 +; CHECK: Folded Reload +; CHECK: _tlv_atexit + br label %__tls_init.exit + +; CHECK: [[BB_end]]: +__tls_init.exit: + ret %struct.S* @sg +} + +; CHECK-LABEL: _ZTW4sum1 +define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind { + ret i32* @sum1 +} + +define cxx_fast_tlscc i32* @_ZTW4sum2() #0 { + ret i32* @sum1 +} + +attributes #0 = { nounwind "no-frame-pointer-elim"="true" } \ No newline at end of file