Index: include/llvm/CodeGen/TargetPassConfig.h =================================================================== --- include/llvm/CodeGen/TargetPassConfig.h +++ include/llvm/CodeGen/TargetPassConfig.h @@ -16,8 +16,11 @@ #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include +extern llvm::cl::opt UseIPRA; + namespace llvm { class PassConfigImpl; Index: include/llvm/Target/TargetFrameLowering.h =================================================================== --- include/llvm/Target/TargetFrameLowering.h +++ include/llvm/Target/TargetFrameLowering.h @@ -331,6 +331,26 @@ virtual bool canUseAsEpilogue(const MachineBasicBlock &MBB) const { return true; } + + /// Check if given function is safe for not having callee saved registers. + /// This is used when interprocedural register allocation is enabled. + static bool isSafeForNoCSROpt(const Function *F) { + if (!F->hasLocalLinkage() || F->hasAddressTaken() || + !F->hasFnAttribute(Attribute::NoRecurse)) + return false; + + // Function should not be optimized as tail call. + const Module *M = F->getParent(); + for (const Function &Fu : *M) + for (const BasicBlock &BB : Fu) + for (const Instruction &II : BB) { + if (auto CS = ImmutableCallSite(&II)) + if (CS.getCalledFunction() == F && CS.isTailCall()) + return false; + } + + return true; + } }; } // End llvm namespace Index: lib/CodeGen/RegUsageInfoCollector.cpp =================================================================== --- lib/CodeGen/RegUsageInfoCollector.cpp +++ lib/CodeGen/RegUsageInfoCollector.cpp @@ -17,6 +17,7 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -26,11 +27,15 @@ #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" using namespace llvm; #define DEBUG_TYPE "ip-regalloc" +STATISTIC(NumCSROpt, + "Number of functions optimized for callee saved registers"); + namespace llvm { void initializeRegUsageInfoCollectorPass(PassRegistry &); } @@ -101,6 +106,8 @@ unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; RegMask.resize(RegMaskSize, 0xFFFFFFFF); + const Function *F = MF.getFunction(); + PhysicalRegisterUsageInfo *PRUI = &getAnalysis(); PRUI->setTargetMachine(&TM); @@ -111,11 +118,16 @@ if (MRI->isPhysRegModified(PReg, true)) markRegClobbered(TRI, &RegMask[0], PReg); - const uint32_t *CallPreservedMask = - TRI->getCallPreservedMask(MF, MF.getFunction()->getCallingConv()); - // Set callee saved register as preserved. - for (unsigned i = 0; i < RegMaskSize; ++i) - RegMask[i] = RegMask[i] | CallPreservedMask[i]; + if (!TargetFrameLowering::isSafeForNoCSROpt(F)) { + const uint32_t *CallPreservedMask = + TRI->getCallPreservedMask(MF, MF.getFunction()->getCallingConv()); + // Set callee saved register as preserved. + for (unsigned i = 0; i < RegMaskSize; ++i) + RegMask[i] = RegMask[i] | CallPreservedMask[i]; + } else { + ++NumCSROpt; + DEBUG(dbgs() << "Function optimized for not having CSR.\n"); + } for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg)) Index: lib/CodeGen/TargetFrameLoweringImpl.cpp =================================================================== --- lib/CodeGen/TargetFrameLoweringImpl.cpp +++ lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -12,13 +12,14 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/BitVector.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include @@ -59,15 +60,24 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { - // Get the callee saved register list... const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); // Resize before the early returns. Some backends expect that // SavedRegs.size() == TRI.getNumRegs() after this call even if there are no // saved registers. SavedRegs.resize(TRI.getNumRegs()); + // When interprocedural register allocation is enabled caller saved registers + // are preferred over callee saved registers. + if (UseIPRA) { + const Function *F = MF.getFunction(); + if (isSafeForNoCSROpt(F)) + return; + } + + // Get the callee saved register list... + const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); + // Early exit if there are no callee saved registers. if (!CSRegs || CSRegs[0] == 0) return; Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -27,7 +27,6 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" Index: test/CodeGen/X86/ipra-local-linkage.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/ipra-local-linkage.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s | FileCheck %s -check-prefix=NOIPRA +; RUN: llc -enable-ipra < %s | FileCheck %s + +target triple = "x86_64--" + +define internal void @foo() norecurse #0 { +; When IPRA is not enabled R15 will be saved by foo as it is callee saved reg. +; NOIPRA-LABEL: foo: +; NOIPRA: pushq %r15 +; When IPRA is enabled none register should be saved as foo() is local function +; so we optimize it to save no registers. +; CHECK-LABEL: foo: +; CHECK-NOT: pushq %r15 + call void asm sideeffect "movl %r14d, %r15d", "~{r15}"() #0 + ret void +} + +define void @bar() #0 { + %X = add i32 1, 3 + call void asm sideeffect "movl %r12d, $0", "{r15}~{r12}"(i32 %X) #0 + ; As R15 is clobbered by foo() when IPRA is enabled value of R15 should be + ; reloaded after foo() + call void @foo() + ; CHECK-LABEL: bar: + ; CHECK: callq foo + ; CHECK-NEXT: movl $4, %r15d + call void asm sideeffect "movl $0, %r12d", "{r15}~{r12}"(i32 %X) #0 + ret void +}