Index: include/llvm/CodeGen/MachineFunction.h =================================================================== --- include/llvm/CodeGen/MachineFunction.h +++ include/llvm/CodeGen/MachineFunction.h @@ -556,6 +556,11 @@ OperandRecycler.deallocate(Cap, Array); } + /// Return allocator for data with the lifetime of the MachineFunction. + BumpPtrAllocator &getAllocator() { + return Allocator; + } + /// \brief Allocate and initialize a register mask with @p NumRegister bits. uint32_t *allocateRegisterMask(unsigned NumRegister) { unsigned Size = (NumRegister + 31) / 32; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2519,6 +2519,11 @@ assert(!Res && "Call operand has unhandled type"); (void)Res; } + + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const uint32_t *PreservedMask = TRI.getCallPreservedMask(MF, CallConv); + BitVector ParametersInCalleeSaveRegs; + assert(ArgLocs.size() == Ins.size()); SmallVector ArgValues; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -2563,8 +2568,15 @@ else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); + MCPhysReg PhysReg = VA.getLocReg(); + if (!MachineOperand::clobbersPhysReg(PreservedMask, PhysReg)) { + if (ParametersInCalleeSaveRegs.size() == 0) + ParametersInCalleeSaveRegs.resize(TRI.getNumRegs()); + ParametersInCalleeSaveRegs.set(PhysReg); + } + // Transform the arguments in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + unsigned Reg = MF.addLiveIn(PhysReg, RC); ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); // If this is an 8, 16 or 32-bit value, it is really passed promoted @@ -2672,15 +2684,49 @@ // much is there while considering tail calls (because we can reuse it). FuncInfo->setBytesInStackArgArea(StackArgSize); - // Should we save callee save registers via vregs? - if (CallConv == CallingConv::CXX_FAST_TLS && + const MCPhysReg *CalleeSavedRegs = + AArch64RegisterInfo::getBasicCalleeSavedRegs(&MF); + const MCPhysReg *SavedViaCopy = nullptr; + + // Should we save some callee saved registers in vregs? + if ((CallConv == CallingConv::CXX_FAST_TLS || + ParametersInCalleeSaveRegs.size() > 0) && MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) && !MF.getTarget().Options.EnableFastISel && mayUseSplitCSR(MF)) { - const MCPhysReg *SavedViaCopy = - AArch64RegisterInfo::getFastTLSSavedViaCopy(); + if (CallConv == CallingConv::CXX_FAST_TLS) { + SavedViaCopy = AArch64RegisterInfo::getFastTLSSavedViaCopy(); + CalleeSavedRegs = AArch64RegisterInfo::getFastTLSSaved(); + } else { + assert(ParametersInCalleeSaveRegs.size() > 0 && "must have csr params"); + // Split callee saved registers into SavedViaCopy and the rest. + unsigned NCalleeSavedRegs = 0; + for (const MCPhysReg *CSR = CalleeSavedRegs; *CSR != 0; ++CSR) + ++NCalleeSavedRegs; + unsigned NParamsInCSR = ParametersInCalleeSaveRegs.count(); + + MCPhysReg *NewCalleeSavedRegs = + MF.getAllocator().Allocate(NCalleeSavedRegs-NParamsInCSR+1); + MCPhysReg *NewSavedViaCopy = + MF.getAllocator().Allocate(NParamsInCSR+1); + unsigned C0 = 0; + unsigned C1 = 0; + for (const MCPhysReg *CSR = CalleeSavedRegs; *CSR != 0; ++CSR) { + MCPhysReg Reg = *CSR; + if (ParametersInCalleeSaveRegs[Reg]) + NewSavedViaCopy[C0++] = Reg; + else + NewCalleeSavedRegs[C1++] = Reg; + } + NewSavedViaCopy[C0++] = 0; + NewCalleeSavedRegs[C1++] = 0; + assert(C0 == NParamsInCSR+1 && "Correct register count"); + assert(C1 == NCalleeSavedRegs-NParamsInCSR+1 && "Correct register count"); + CalleeSavedRegs = NewCalleeSavedRegs; + SavedViaCopy = NewSavedViaCopy; + } FuncInfo->setCSRSavedViaCopy(SavedViaCopy); - // Add live-ins. + // Add live-ins for SavedViaCopy. for (const MCPhysReg *I = SavedViaCopy; *I; ++I) { const MCPhysReg Reg = *I; const TargetRegisterClass *RC; @@ -2693,6 +2739,7 @@ MF.addLiveIn(Reg, RC); } } + FuncInfo->setCalleeSavedRegs(CalleeSavedRegs); return Chain; } Index: lib/Target/AArch64/AArch64MachineFunctionInfo.h =================================================================== --- lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -82,6 +82,8 @@ /// frame is unknown at compile time. e.g., in case of VLAs. bool StackRealigned; + const MCPhysReg *CalleeSavedRegs = nullptr; + public: AArch64FunctionInfo() : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), @@ -142,6 +144,9 @@ unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; } void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; } + const MCPhysReg *getCalleeSavedRegs() const { return CalleeSavedRegs; } + void setCalleeSavedRegs(const MCPhysReg *CSR) { CalleeSavedRegs = CSR; } + typedef SmallPtrSet SetOfInstructions; const SetOfInstructions &getLOHRelated() const { return LOHRelated; } Index: lib/Target/AArch64/AArch64RegisterInfo.h =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.h +++ lib/Target/AArch64/AArch64RegisterInfo.h @@ -33,11 +33,17 @@ bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; - /// Code Generation virtual methods... + /// Return a zero teerminated list of callee saved registers for function \p + /// MF. This does not include callee saved registers handled by the SplitCSR + /// mechanism \see getCalleeSavedRegsViaCopy(). const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override; + /// Return a zero terminated list of callee saved registers for function + /// \p MF, also includes registers handled by SplitCSR. + static const MCPhysReg *getBasicCalleeSavedRegs(const MachineFunction *MF); + unsigned getCSRFirstUseCost() const override { // The cost will be compared against BlockFrequency where entry has the // value of 1 << 14. A value of 5 will choose to spill or split really @@ -96,6 +102,8 @@ /// Returns CSR_AArch64_CXX_TLS_Darwin_ViaCopy_SaveList. static const MCPhysReg *getFastTLSSavedViaCopy(); + /// Returns CSR_AArch64_CXX_TLS_Darwin_PE_SaveList. + static const MCPhysReg *getFastTLSSaved(); }; } // end namespace llvm Index: lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.cpp +++ lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -39,7 +39,7 @@ : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {} const MCPhysReg * -AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { +AArch64RegisterInfo::getBasicCalleeSavedRegs(const MachineFunction *MF) { assert(MF && "Invalid MachineFunction pointer."); if (MF->getFunction()->getCallingConv() == CallingConv::GHC) // GHC set of callee saved regs is empty as all those regs are @@ -48,9 +48,7 @@ if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) return CSR_AArch64_AllRegs_SaveList; if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS) - return MF->getInfo()->getCSRSavedViaCopy() ? - CSR_AArch64_CXX_TLS_Darwin_PE_SaveList : - CSR_AArch64_CXX_TLS_Darwin_SaveList; + return CSR_AArch64_CXX_TLS_Darwin_SaveList; if (MF->getSubtarget().getTargetLowering() ->supportSwiftError() && MF->getFunction()->getAttributes().hasAttrSomewhere( @@ -62,10 +60,23 @@ return CSR_AArch64_AAPCS_SaveList; } +const MCPhysReg * +AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + const AArch64FunctionInfo &FuncInfo = *MF->getInfo(); + const MCPhysReg *CalleeSavedRegs = FuncInfo.getCalleeSavedRegs(); + if (CalleeSavedRegs) + return CalleeSavedRegs; + return getBasicCalleeSavedRegs(MF); +} + const MCPhysReg *AArch64RegisterInfo::getFastTLSSavedViaCopy() { return CSR_AArch64_CXX_TLS_Darwin_ViaCopy_SaveList; } +const MCPhysReg *AArch64RegisterInfo::getFastTLSSaved() { + return CSR_AArch64_CXX_TLS_Darwin_PE_SaveList; +} + const uint32_t * AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { Index: test/CodeGen/AArch64/swiftself.ll =================================================================== --- test/CodeGen/AArch64/swiftself.ll +++ test/CodeGen/AArch64/swiftself.ll @@ -20,6 +20,19 @@ ret i8 *%res } +declare void @func(i8 *) + +; CHECK-LABEL: swiftself_call_normal: +; OPT-NOT: {{stp|str}} {{.*}}x19 +; OPT: mov x0, x19 +; OPT-NEXT: bl {{_?}}func +; OPT-NOT: {{ldp|ldr}} {{.*}}x19 +; OPT: ret +define void @swiftself_call_normal(i8* swiftself %addr0) nounwind { + call void @func(i8 *%addr0) + ret void +} + ; x19 should be saved by the callee even if used for swiftself ; CHECK-LABEL: swiftself_clobber: ; CHECK: {{stp|str}} {{.*}}x19{{.*}}sp @@ -34,6 +47,7 @@ ; Demonstrate that we do not need any movs when calling multiple functions ; with swiftself argument. ; CHECK-LABEL: swiftself_passthrough: +; OPT-NOT: stp {{.*}}x19 ; OPT: bl {{_?}}swiftself_param ; OPT-NEXT: bl {{_?}}swiftself_param ; OPT: ret @@ -47,7 +61,7 @@ ; CHECK-LABEL: swiftself_tail: ; OPT: b {{_?}}swiftself_param ; OPT-NOT: ret -define i8* @swiftself_tail(i8* swiftself %addr0) { +define i8* @swiftself_tail(i8* swiftself %addr0) nounwind { call void asm sideeffect "", "~{x19}"() %res = tail call i8* @swiftself_param(i8* swiftself %addr0) ret i8* %res