Index: include/llvm/CodeGen/FunctionLoweringInfo.h =================================================================== --- include/llvm/CodeGen/FunctionLoweringInfo.h +++ include/llvm/CodeGen/FunctionLoweringInfo.h @@ -62,9 +62,6 @@ /// registers. bool CanLowerReturn; - /// True if part of the CSRs will be handled via explicit copies. - bool SplitCSR; - /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg /// allocated to hold a pointer to the hidden sret parameter. unsigned DemoteRegister; Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -1218,6 +1218,10 @@ Action != TypeSplitVector; } + /// Returns true if the given function only ends in return (or unreachable) + /// instructions. + static bool mayUseSplitCSR(const MachineFunction &MF); + //===--------------------------------------------------------------------===// // TargetLowering Configuration Methods - These methods should be invoked by // the derived class constructor to configure this object for the target. @@ -2307,35 +2311,12 @@ return false; } - /// Return true if the target supports that a subset of CSRs for the given - /// machine function is handled explicitly via copies. - virtual bool supportSplitCSR(MachineFunction *MF) const { - return false; - } - /// Return true if the MachineFunction contains a COPY which would imply /// HasCopyImplyingStackAdjustment. virtual bool hasCopyImplyingStackAdjustment(MachineFunction *MF) const { return false; } - /// Perform necessary initialization to handle a subset of CSRs explicitly - /// via copies. This function is called at the beginning of instruction - /// selection. - virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { - llvm_unreachable("Not Implemented"); - } - - /// Insert explicit copies in entry and exit blocks. We copy a subset of - /// CSRs to virtual registers in the entry block, and copy them back to - /// physical registers in the exit blocks. This function is called at the end - /// of instruction selection. - virtual void insertCopiesSplitCSR( - MachineBasicBlock *Entry, - const SmallVectorImpl &Exits) const { - llvm_unreachable("Not Implemented"); - } - //===--------------------------------------------------------------------===// // Lowering methods - These methods must be implemented by targets so that // the SelectionDAGBuilder code knows how to lower these. @@ -2680,6 +2661,18 @@ bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const; + /// Copy callee saved registers that are saved in vregs back to physregs. + /// Takes a zero-terminated list of registers that have been added with + /// MachineFunction::addLiveIn(). Creates a sequence of + /// CopyFromReg/CopyToReg nodes and appends physreg operands to the given + /// operand list. Returns the glue value of the last CopyToReg. + /// Note: Typically used to add operands to the target return instruction when + /// some callee saved registers are kepy in vregs because they are also used + /// for function parameters. + SDValue addCalleeSaveRegOps(const MCPhysReg *Regs, SDLoc DL, SDValue Chain, + SDValue Glue, SmallVectorImpl &Ops, + SelectionDAG &DAG) const; + //===--------------------------------------------------------------------===// // Inline Asm Support hooks // Index: include/llvm/Target/TargetRegisterInfo.h =================================================================== --- include/llvm/Target/TargetRegisterInfo.h +++ include/llvm/Target/TargetRegisterInfo.h @@ -439,11 +439,6 @@ virtual const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const = 0; - virtual const MCPhysReg* - getCalleeSavedRegsViaCopy(const MachineFunction *MF) const { - return nullptr; - } - /// Return a mask of call-preserved registers for the given calling convention /// on the current function. The mask should include all call-preserved /// aliases. This is used by the register allocator to determine which Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -471,58 +471,15 @@ MF->setHasInlineAsm(false); - FuncInfo->SplitCSR = false; - - // We split CSR if the target supports it for the given function - // and the function has only return exits. - if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) { - FuncInfo->SplitCSR = true; - - // Collect all the return blocks. - for (const BasicBlock &BB : Fn) { - if (!succ_empty(&BB)) - continue; - - const TerminatorInst *Term = BB.getTerminator(); - if (isa(Term) || isa(Term)) - continue; - - // Bail out if the exit block is not Return nor Unreachable. - FuncInfo->SplitCSR = false; - break; - } - } - - MachineBasicBlock *EntryMBB = &MF->front(); - if (FuncInfo->SplitCSR) - // This performs initialization so lowering for SplitCSR will be correct. - TLI->initializeSplitCSR(EntryMBB); - SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be // copied into vregs, emit the copies into the top of the block before // emitting the code for the block. const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + MachineBasicBlock *EntryMBB = &MF->front(); RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII); - // Insert copies in the entry block and the return blocks. - if (FuncInfo->SplitCSR) { - SmallVector Returns; - // Collect all the return blocks. - for (MachineBasicBlock &MBB : mf) { - if (!MBB.succ_empty()) - continue; - - MachineBasicBlock::iterator Term = MBB.getFirstTerminator(); - if (Term != MBB.end() && Term->isReturn()) { - Returns.push_back(&MBB); - continue; - } - } - TLI->insertCopiesSplitCSR(EntryMBB, Returns); - } - DenseMap LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -2938,6 +2939,26 @@ return false; } +SDValue TargetLowering::addCalleeSaveRegOps(const MCPhysReg *Regs, SDLoc DL, + SDValue Chain, SDValue Glue, SmallVectorImpl &Ops, + SelectionDAG &DAG) const { + const MachineFunction &MF = DAG.getMachineFunction(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (const MCPhysReg *I = Regs; *I; ++I) { + const MCPhysReg PReg = *I; + unsigned VReg = MRI.getLiveInVirtReg(PReg); + + const TargetRegisterClass &RC = *MRI.getRegClass(VReg); + MVT RegVT = *RC.vt_begin(); + SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); + SDValue CopyToReg = DAG.getCopyToReg(Chain, DL, PReg, Val, Glue); + Glue = CopyToReg.getValue(1); + + Ops.push_back(DAG.getRegister(PReg, RegVT)); + } + return Glue; +} + //===----------------------------------------------------------------------===// // Legalization Utilities //===----------------------------------------------------------------------===// Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" @@ -1809,6 +1810,18 @@ return true; } +bool TargetLoweringBase::mayUseSplitCSR(const MachineFunction &MF) { + for (const BasicBlock &BB : *MF.getFunction()) { + if (!succ_empty(&BB)) + continue; + + const TerminatorInst *Term = BB.getTerminator(); + if (!isa(Term) && !isa(Term)) + return false; + } + return true; +} + //===----------------------------------------------------------------------===// // Stack Protector //===----------------------------------------------------------------------===// Index: lib/Target/AArch64/AArch64FastISel.cpp =================================================================== --- lib/Target/AArch64/AArch64FastISel.cpp +++ lib/Target/AArch64/AArch64FastISel.cpp @@ -3679,9 +3679,6 @@ F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) return false; - if (TLI.supportSplitCSR(FuncInfo.MF)) - return false; - // Build a list of return value registers. SmallVector RetRegs; Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -391,14 +391,6 @@ bool isCheapToSpeculateCtlz() const override { return true; } - bool supportSplitCSR(MachineFunction *MF) const override { - return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && - MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); - } - void initializeSplitCSR(MachineBasicBlock *Entry) const override; - void insertCopiesSplitCSR( - MachineBasicBlock *Entry, - const SmallVectorImpl &Exits) const override; bool supportSwiftError() const override { return true; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2672,6 +2672,28 @@ // much is there while considering tail calls (because we can reuse it). FuncInfo->setBytesInStackArgArea(StackArgSize); + // Should we save callee save registers via vregs? + if (CallConv == CallingConv::CXX_FAST_TLS && + MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) && + !MF.getTarget().Options.EnableFastISel && mayUseSplitCSR(MF)) { + const MCPhysReg *SavedViaCopy = + AArch64RegisterInfo::getFastTLSSavedViaCopy(); + FuncInfo->setCSRSavedViaCopy(SavedViaCopy); + + // Add live-ins. + for (const MCPhysReg *I = SavedViaCopy; *I; ++I) { + const MCPhysReg Reg = *I; + const TargetRegisterClass *RC; + if (AArch64::GPR64RegClass.contains(Reg)) + RC = &AArch64::GPR64RegClass; + else if (AArch64::FPR64RegClass.contains(Reg)) + RC = &AArch64::FPR64RegClass; + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + MF.addLiveIn(Reg, RC); + } + } + return Chain; } @@ -3348,11 +3370,12 @@ ? RetCC_AArch64_WebKit_JS : RetCC_AArch64_AAPCS; SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, - *DAG.getContext()); + MachineFunction &MF = DAG.getMachineFunction(); + CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC); // Copy the result values into the output registers. + SDValue IncomingChain = Chain; SDValue Flag; SmallVector RetOps(1, Chain); for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); @@ -3382,19 +3405,11 @@ Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); - const MCPhysReg *I = - TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); - if (I) { - for (; *I; ++I) { - if (AArch64::GPR64RegClass.contains(*I)) - RetOps.push_back(DAG.getRegister(*I, MVT::i64)); - else if (AArch64::FPR64RegClass.contains(*I)) - RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); - else - llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - } - } + const AArch64FunctionInfo &FuncInfo = *MF.getInfo(); + const MCPhysReg *SavedViaCopy = FuncInfo.getCSRSavedViaCopy(); + if (SavedViaCopy != nullptr) + Flag = addCalleeSaveRegOps(SavedViaCopy, DL, IncomingChain, Flag, RetOps, + DAG); RetOps[0] = Chain; // Update chain. @@ -10264,53 +10279,6 @@ Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0)); } -void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { - // Update IsSplitCSR in AArch64unctionInfo. - AArch64FunctionInfo *AFI = Entry->getParent()->getInfo(); - AFI->setIsSplitCSR(true); -} - -void AArch64TargetLowering::insertCopiesSplitCSR( - MachineBasicBlock *Entry, - const SmallVectorImpl &Exits) const { - const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); - const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); - if (!IStart) - return; - - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); - MachineBasicBlock::iterator MBBI = Entry->begin(); - for (const MCPhysReg *I = IStart; *I; ++I) { - const TargetRegisterClass *RC = nullptr; - if (AArch64::GPR64RegClass.contains(*I)) - RC = &AArch64::GPR64RegClass; - else if (AArch64::FPR64RegClass.contains(*I)) - RC = &AArch64::FPR64RegClass; - else - llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - - unsigned NewVR = MRI->createVirtualRegister(RC); - // Create copy from CSR to a virtual register. - // FIXME: this currently does not emit CFI pseudo-instructions, it works - // fine for CXX_FAST_TLS since the C++-style TLS access functions should be - // nounwind. If we want to generalize this later, we may need to emit - // CFI pseudo-instructions. - assert(Entry->getParent()->getFunction()->hasFnAttribute( - Attribute::NoUnwind) && - "Function should be nounwind in insertCopiesSplitCSR!"); - Entry->addLiveIn(*I); - BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) - .addReg(*I); - - // Insert the copy-back instructions right before the terminator. - for (auto *Exit : Exits) - BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), - TII->get(TargetOpcode::COPY), *I) - .addReg(NewVR); - } -} - bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const { // Integer division on AArch64 is expensive. However, when aggressively // optimizing for code size, we prefer to use a div instruction, as it is Index: lib/Target/AArch64/AArch64MachineFunctionInfo.h =================================================================== --- lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -75,9 +75,8 @@ /// registers. unsigned VarArgsFPRSize; - /// True if this function has a subset of CSRs that is handled explicitly via - /// copies. - bool IsSplitCSR; + /// zero-terminated list of callee saved registers saved by copying to a vreg. + const MCPhysReg *CSRSavedViaCopy = nullptr; /// True when the stack gets realigned dynamically because the size of stack /// frame is unknown at compile time. e.g., in case of VLAs. @@ -88,13 +87,13 @@ : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0), - IsSplitCSR(false), StackRealigned(false) {} + StackRealigned(false) {} explicit AArch64FunctionInfo(MachineFunction &MF) : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0), - IsSplitCSR(false), StackRealigned(false) { + StackRealigned(false) { (void)MF; } @@ -112,8 +111,10 @@ bool isStackRealigned() const { return StackRealigned; } void setStackRealigned(bool s) { StackRealigned = s; } - bool isSplitCSR() const { return IsSplitCSR; } - void setIsSplitCSR(bool s) { IsSplitCSR = s; } + const MCPhysReg *getCSRSavedViaCopy() const { return CSRSavedViaCopy; } + void setCSRSavedViaCopy(const MCPhysReg *CSRSavedViaCopy) { + this->CSRSavedViaCopy = CSRSavedViaCopy; + } void setLocalStackSize(unsigned Size) { LocalStackSize = Size; } unsigned getLocalStackSize() const { return LocalStackSize; } Index: lib/Target/AArch64/AArch64RegisterInfo.h =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.h +++ lib/Target/AArch64/AArch64RegisterInfo.h @@ -35,8 +35,6 @@ /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; - const MCPhysReg * - getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override; @@ -95,6 +93,9 @@ unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; + + /// Returns CSR_AArch64_CXX_TLS_Darwin_ViaCopy_SaveList. + static const MCPhysReg *getFastTLSSavedViaCopy(); }; } // end namespace llvm Index: lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.cpp +++ lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -48,7 +48,7 @@ if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) return CSR_AArch64_AllRegs_SaveList; if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS) - return MF->getInfo()->isSplitCSR() ? + return MF->getInfo()->getCSRSavedViaCopy() ? CSR_AArch64_CXX_TLS_Darwin_PE_SaveList : CSR_AArch64_CXX_TLS_Darwin_SaveList; if (MF->getSubtarget().getTargetLowering() @@ -62,13 +62,8 @@ return CSR_AArch64_AAPCS_SaveList; } -const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy( - const MachineFunction *MF) const { - assert(MF && "Invalid MachineFunction pointer."); - if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && - MF->getInfo()->isSplitCSR()) - return CSR_AArch64_CXX_TLS_Darwin_ViaCopy_SaveList; - return nullptr; +const MCPhysReg *AArch64RegisterInfo::getFastTLSSavedViaCopy() { + return CSR_AArch64_CXX_TLS_Darwin_ViaCopy_SaveList; } const uint32_t * Index: lib/Target/ARM/ARMBaseRegisterInfo.h =================================================================== --- lib/Target/ARM/ARMBaseRegisterInfo.h +++ lib/Target/ARM/ARMBaseRegisterInfo.h @@ -98,8 +98,6 @@ public: /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; - const MCPhysReg * - getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const override; @@ -193,6 +191,9 @@ const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC) const override; + + /// Returns CSR_iOS_CXX_TLS_ViaCopy_SaveList. + static const MCPhysReg *getFastTLSSavedViaCopy(); }; } // end namespace llvm Index: lib/Target/ARM/ARMBaseRegisterInfo.cpp =================================================================== --- lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -89,21 +89,12 @@ return CSR_iOS_SwiftError_SaveList; if (STI.isTargetDarwin() && F->getCallingConv() == CallingConv::CXX_FAST_TLS) - return MF->getInfo()->isSplitCSR() + return MF->getInfo()->getCSRSavedViaCopy() ? CSR_iOS_CXX_TLS_PE_SaveList : CSR_iOS_CXX_TLS_SaveList; return RegList; } -const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy( - const MachineFunction *MF) const { - assert(MF && "Invalid MachineFunction pointer."); - if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && - MF->getInfo()->isSplitCSR()) - return CSR_iOS_CXX_TLS_ViaCopy_SaveList; - return nullptr; -} - const uint32_t * ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { @@ -836,3 +827,7 @@ } return false; } + +const MCPhysReg *ARMBaseRegisterInfo::getFastTLSSavedViaCopy() { + return CSR_iOS_CXX_TLS_ViaCopy_SaveList; +} Index: lib/Target/ARM/ARMFastISel.cpp =================================================================== --- lib/Target/ARM/ARMFastISel.cpp +++ lib/Target/ARM/ARMFastISel.cpp @@ -2119,9 +2119,6 @@ F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) return false; - if (TLI.supportSplitCSR(FuncInfo.MF)) - return false; - // Build a list of return value registers. SmallVector RetRegs; Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -591,15 +591,6 @@ SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal) const; - bool supportSplitCSR(MachineFunction *MF) const override { - return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && - MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); - } - void initializeSplitCSR(MachineBasicBlock *Entry) const override; - void insertCopiesSplitCSR( - MachineBasicBlock *Entry, - const SmallVectorImpl &Exits) const override; - SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -2273,6 +2273,7 @@ CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, isVarArg)); + SDValue IncomingChain = Chain; SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) @@ -2345,19 +2346,10 @@ Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); - const MCPhysReg *I = - TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); - if (I) { - for (; *I; ++I) { - if (ARM::GPRRegClass.contains(*I)) - RetOps.push_back(DAG.getRegister(*I, MVT::i32)); - else if (ARM::DPRRegClass.contains(*I)) - RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64))); - else - llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - } - } + const MCPhysReg *SavedViaCopy = AFI->getCSRSavedViaCopy(); + if (SavedViaCopy != nullptr) + Flag = addCalleeSaveRegOps(SavedViaCopy, dl, IncomingChain, Flag, RetOps, + DAG); // Update chain and glue. RetOps[0] = Chain; @@ -3389,6 +3381,27 @@ AFI->setArgumentStackSize(CCInfo.getNextStackOffset()); + // Should we save callee save registers via vregs? + if (CallConv == CallingConv::CXX_FAST_TLS && + MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) && + !MF.getTarget().Options.EnableFastISel && mayUseSplitCSR(MF)) { + const MCPhysReg *SavedViaCopy = + ARMBaseRegisterInfo::getFastTLSSavedViaCopy(); + AFI->setCSRSavedViaCopy(SavedViaCopy); + + for (const MCPhysReg *I = SavedViaCopy; *I; ++I) { + const MCPhysReg Reg = *I; + const TargetRegisterClass *RC; + if (ARM::GPRRegClass.contains(Reg)) + RC = &ARM::GPRRegClass; + else if (ARM::DPRRegClass.contains(Reg)) + RC = &ARM::DPRRegClass; + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + MF.addLiveIn(Reg, RC); + } + } + return Chain; } @@ -12530,50 +12543,3 @@ // via the personality function. return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1; } - -void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { - // Update IsSplitCSR in ARMFunctionInfo. - ARMFunctionInfo *AFI = Entry->getParent()->getInfo(); - AFI->setIsSplitCSR(true); -} - -void ARMTargetLowering::insertCopiesSplitCSR( - MachineBasicBlock *Entry, - const SmallVectorImpl &Exits) const { - const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); - const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); - if (!IStart) - return; - - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); - MachineBasicBlock::iterator MBBI = Entry->begin(); - for (const MCPhysReg *I = IStart; *I; ++I) { - const TargetRegisterClass *RC = nullptr; - if (ARM::GPRRegClass.contains(*I)) - RC = &ARM::GPRRegClass; - else if (ARM::DPRRegClass.contains(*I)) - RC = &ARM::DPRRegClass; - else - llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - - unsigned NewVR = MRI->createVirtualRegister(RC); - // Create copy from CSR to a virtual register. - // FIXME: this currently does not emit CFI pseudo-instructions, it works - // fine for CXX_FAST_TLS since the C++-style TLS access functions should be - // nounwind. If we want to generalize this later, we may need to emit - // CFI pseudo-instructions. - assert(Entry->getParent()->getFunction()->hasFnAttribute( - Attribute::NoUnwind) && - "Function should be nounwind in insertCopiesSplitCSR!"); - Entry->addLiveIn(*I); - BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) - .addReg(*I); - - // Insert the copy-back instructions right before the terminator. - for (auto *Exit : Exits) - BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), - TII->get(TargetOpcode::COPY), *I) - .addReg(NewVR); - } -} Index: lib/Target/ARM/ARMMachineFunctionInfo.h =================================================================== --- lib/Target/ARM/ARMMachineFunctionInfo.h +++ lib/Target/ARM/ARMMachineFunctionInfo.h @@ -118,9 +118,8 @@ /// coalesced weights. DenseMap CoalescedWeights; - /// True if this function has a subset of CSRs that is handled explicitly via - /// copies. - bool IsSplitCSR; + /// zero-terminated list of callee saved registers saved by copying to a vreg. + const MCPhysReg *CSRSavedViaCopy = nullptr; public: ARMFunctionInfo() : @@ -132,7 +131,7 @@ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0), NumAlignedDPRCS2Regs(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false) {} + VarArgsFrameIndex(0), HasITBlocks(false) {} explicit ARMFunctionInfo(MachineFunction &MF); @@ -203,8 +202,10 @@ bool hasITBlocks() const { return HasITBlocks; } void setHasITBlocks(bool h) { HasITBlocks = h; } - bool isSplitCSR() const { return IsSplitCSR; } - void setIsSplitCSR(bool s) { IsSplitCSR = s; } + const MCPhysReg *getCSRSavedViaCopy() const { return CSRSavedViaCopy; } + void setCSRSavedViaCopy(const MCPhysReg *CSRSavedViaCopy) { + this->CSRSavedViaCopy = CSRSavedViaCopy; + } void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) { if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second) Index: lib/Target/ARM/ARMMachineFunctionInfo.cpp =================================================================== --- lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -20,5 +20,4 @@ RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), - PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), - IsSplitCSR(false) {} + PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {} Index: lib/Target/X86/X86FastISel.cpp =================================================================== --- lib/Target/X86/X86FastISel.cpp +++ lib/Target/X86/X86FastISel.cpp @@ -1021,9 +1021,6 @@ F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) return false; - if (TLI.supportSplitCSR(FuncInfo.MF)) - return false; - CallingConv::ID CC = F.getCallingConv(); if (CC != CallingConv::C && CC != CallingConv::Fast && Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1116,15 +1116,6 @@ const SmallVectorImpl &OutVals, SDLoc dl, SelectionDAG &DAG) const override; - bool supportSplitCSR(MachineFunction *MF) const override { - return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && - MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); - } - void initializeSplitCSR(MachineBasicBlock *Entry) const override; - void insertCopiesSplitCSR( - MachineBasicBlock *Entry, - const SmallVectorImpl &Exits) const override; - bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; bool mayBeEmittedAsTailCall(CallInst *CI) const override; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -2138,6 +2138,7 @@ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_X86); + SDValue IncomingChain = Chain; SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) @@ -2228,8 +2229,8 @@ // either case FuncInfo->setSRetReturnReg() will have been called. if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) { // When we have both sret and another return value, we should use the - // original Chain stored in RetOps[0], instead of the current Chain updated - // in the above loop. If we only have sret, RetOps[0] equals to Chain. + // original Chain, instead of the current Chain updated in the above loop. + // If we only have sret, RetOps[0] equals to Chain. // For the case of sret and another return value, we have // Chain_0 at the function entry @@ -2246,7 +2247,7 @@ // Chain dependency from Unit A to Unit B // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg. - SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg, + SDValue Val = DAG.getCopyFromReg(IncomingChain, dl, SRetReg, getPointerTy(MF.getDataLayout())); unsigned RetValReg @@ -2260,17 +2261,12 @@ DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); } - const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); - const MCPhysReg *I = - TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); - if (I) { - for (; *I; ++I) { - if (X86::GR64RegClass.contains(*I)) - RetOps.push_back(DAG.getRegister(*I, MVT::i64)); - else - llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - } - } + // Ensure callee saved registers that are saved via copies are back in their + // registers. + const MCPhysReg *SavedViaCopy = FuncInfo->getCSRSavedViaCopy(); + if (SavedViaCopy != nullptr) + Flag = addCalleeSaveRegOps(SavedViaCopy, dl, IncomingChain, Flag, RetOps, + DAG); RetOps[0] = Chain; // Update chain. @@ -2953,6 +2949,25 @@ } } + // Should we save callee save registers via vregs? + if (CallConv == CallingConv::CXX_FAST_TLS && Is64Bit && + MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) && + !MF.getTarget().Options.EnableFastISel && mayUseSplitCSR(MF)) { + const MCPhysReg *SavedViaCopy = X86RegisterInfo::getFastTLSSavedViaCopy(); + FuncInfo->setCSRSavedViaCopy(SavedViaCopy); + + // Add live-ins. + for (const MCPhysReg *I = SavedViaCopy; *I; ++I) { + const MCPhysReg Reg = *I; + const TargetRegisterClass *RC; + if (X86::GR64RegClass.contains(Reg)) + RC = &X86::GR64RegClass; + else + llvm_unreachable("Unexpected register class in CSRsViaCopy!"); + MF.addLiveIn(Reg, RC); + } + } + return Chain; } @@ -30549,52 +30564,3 @@ Attribute::MinSize); return OptSize && !VT.isVector(); } - -void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { - if (!Subtarget.is64Bit()) - return; - - // Update IsSplitCSR in X86MachineFunctionInfo. - X86MachineFunctionInfo *AFI = - Entry->getParent()->getInfo(); - AFI->setIsSplitCSR(true); -} - -void X86TargetLowering::insertCopiesSplitCSR( - MachineBasicBlock *Entry, - const SmallVectorImpl &Exits) const { - const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); - const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); - if (!IStart) - return; - - const TargetInstrInfo *TII = Subtarget.getInstrInfo(); - MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); - MachineBasicBlock::iterator MBBI = Entry->begin(); - for (const MCPhysReg *I = IStart; *I; ++I) { - const TargetRegisterClass *RC = nullptr; - if (X86::GR64RegClass.contains(*I)) - RC = &X86::GR64RegClass; - else - llvm_unreachable("Unexpected register class in CSRsViaCopy!"); - - unsigned NewVR = MRI->createVirtualRegister(RC); - // Create copy from CSR to a virtual register. - // FIXME: this currently does not emit CFI pseudo-instructions, it works - // fine for CXX_FAST_TLS since the C++-style TLS access functions should be - // nounwind. If we want to generalize this later, we may need to emit - // CFI pseudo-instructions. - assert(Entry->getParent()->getFunction()->hasFnAttribute( - Attribute::NoUnwind) && - "Function should be nounwind in insertCopiesSplitCSR!"); - Entry->addLiveIn(*I); - BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) - .addReg(*I); - - // Insert the copy-back instructions right before the terminator. - for (auto *Exit : Exits) - BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), - TII->get(TargetOpcode::COPY), *I) - .addReg(NewVR); - } -} Index: lib/Target/X86/X86MachineFunctionInfo.h =================================================================== --- lib/Target/X86/X86MachineFunctionInfo.h +++ lib/Target/X86/X86MachineFunctionInfo.h @@ -92,9 +92,8 @@ /// used to address arguments in a function using a base pointer. int SEHFramePtrSaveIndex = 0; - /// True if this function has a subset of CSRs that is handled explicitly via - /// copies. - bool IsSplitCSR = false; + /// zero-terminated list of callee saved registers saved by copying to a vreg. + const MCPhysReg *CSRSavedViaCopy = nullptr; /// True if this function uses the red zone. bool UsesRedZone = false; @@ -168,8 +167,10 @@ return ForwardedMustTailRegParms; } - bool isSplitCSR() const { return IsSplitCSR; } - void setIsSplitCSR(bool s) { IsSplitCSR = s; } + const MCPhysReg *getCSRSavedViaCopy() const { return CSRSavedViaCopy; } + void setCSRSavedViaCopy(const MCPhysReg *CSRSavedViaCopy) { + this->CSRSavedViaCopy = CSRSavedViaCopy; + } bool getUsesRedZone() const { return UsesRedZone; } void setUsesRedZone(bool V) { UsesRedZone = V; } Index: lib/Target/X86/X86RegisterInfo.h =================================================================== --- lib/Target/X86/X86RegisterInfo.h +++ lib/Target/X86/X86RegisterInfo.h @@ -99,8 +99,6 @@ /// callee-save registers on this target. const MCPhysReg * getCalleeSavedRegs(const MachineFunction* MF) const override; - const MCPhysReg * - getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const override; @@ -135,6 +133,9 @@ unsigned getBaseRegister() const { return BasePtr; } // FIXME: Move to FrameInfok unsigned getSlotSize() const { return SlotSize; } + + /// Returns CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList. + static const MCPhysReg *getFastTLSSavedViaCopy(); }; //get512BitRegister - X86 utility - returns 512-bit super register Index: lib/Target/X86/X86RegisterInfo.cpp =================================================================== --- lib/Target/X86/X86RegisterInfo.cpp +++ lib/Target/X86/X86RegisterInfo.cpp @@ -250,7 +250,7 @@ return CSR_64_RT_AllRegs_SaveList; case CallingConv::CXX_FAST_TLS: if (Is64Bit) - return MF->getInfo()->isSplitCSR() ? + return MF->getInfo()->getCSRSavedViaCopy() ? CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList; break; case CallingConv::Intel_OCL_BI: { @@ -310,15 +310,6 @@ return CSR_32_SaveList; } -const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy( - const MachineFunction *MF) const { - assert(MF && "Invalid MachineFunction pointer."); - if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && - MF->getInfo()->isSplitCSR()) - return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList; - return nullptr; -} - const uint32_t * X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { @@ -655,3 +646,7 @@ return Reg; llvm_unreachable("Unexpected SIMD register"); } + +const MCPhysReg *X86RegisterInfo::getFastTLSSavedViaCopy() { + return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList; +} Index: test/CodeGen/ARM/cxx-tlscc.ll =================================================================== --- test/CodeGen/ARM/cxx-tlscc.ll +++ test/CodeGen/ARM/cxx-tlscc.ll @@ -26,7 +26,7 @@ ; THUMB-LABEL: _ZTW2sg ; THUMB: push {{.*}}lr ; THUMB: blx -; THUMB: bne [[TH_end:.?LBB0_[0-9]+]] +; THUMB: bne{{.w}} [[TH_end:.?LBB0_[0-9]+]] ; THUMB: blx ; THUMB: tlv_atexit ; THUMB: [[TH_end]]: