Index: include/llvm/CodeGen/TargetPassConfig.h =================================================================== --- include/llvm/CodeGen/TargetPassConfig.h +++ include/llvm/CodeGen/TargetPassConfig.h @@ -16,8 +16,11 @@ #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" #include +extern llvm::cl::opt UseIPRA; + namespace llvm { class PassConfigImpl; Index: include/llvm/Target/TargetFrameLowering.h =================================================================== --- include/llvm/Target/TargetFrameLowering.h +++ include/llvm/Target/TargetFrameLowering.h @@ -331,6 +331,20 @@ virtual bool canUseAsEpilogue(const MachineBasicBlock &MBB) const { return true; } + + /// Check if given function is safe for not having callee saved registers. + /// This is used when interprocedural register allocation is enabled. + static bool isSafeForNoCSROpt(const Function *F) { + if (!F->hasLocalLinkage() || F->hasAddressTaken() || + !F->hasFnAttribute(Attribute::NoRecurse)) + return false; + // Function should not be optimized as tail call. + for (const User *U : F->users()) + if (auto CS = ImmutableCallSite(U)) + if (CS.isTailCall()) + return false; + return true; + } }; } // End llvm namespace Index: lib/CodeGen/RegUsageInfoCollector.cpp =================================================================== --- lib/CodeGen/RegUsageInfoCollector.cpp +++ lib/CodeGen/RegUsageInfoCollector.cpp @@ -17,6 +17,7 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -26,11 +27,15 @@ #include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" using namespace llvm; #define DEBUG_TYPE "ip-regalloc" +STATISTIC(NumCSROpt, + "Number of functions optimized for callee saved registers"); + namespace llvm { void initializeRegUsageInfoCollectorPass(PassRegistry &); } @@ -101,6 +106,8 @@ unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; RegMask.resize(RegMaskSize, 0xFFFFFFFF); + const Function *F = MF.getFunction(); + PhysicalRegisterUsageInfo *PRUI = &getAnalysis(); PRUI->setTargetMachine(&TM); @@ -111,11 +118,17 @@ if (MRI->isPhysRegModified(PReg, true)) markRegClobbered(TRI, &RegMask[0], PReg); - const uint32_t *CallPreservedMask = - TRI->getCallPreservedMask(MF, MF.getFunction()->getCallingConv()); - // Set callee saved register as preserved. - for (unsigned i = 0; i < RegMaskSize; ++i) - RegMask[i] = RegMask[i] | CallPreservedMask[i]; + if (!TargetFrameLowering::isSafeForNoCSROpt(F)) { + const uint32_t *CallPreservedMask = + TRI->getCallPreservedMask(MF, F->getCallingConv()); + // Set callee saved register as preserved. + for (unsigned i = 0; i < RegMaskSize; ++i) + RegMask[i] = RegMask[i] | CallPreservedMask[i]; + } else { + ++NumCSROpt; + DEBUG(dbgs() << MF.getName() + << " function optimized for not having CSR.\n"); + } for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg)) @@ -123,7 +136,7 @@ DEBUG(dbgs() << " \n----------------------------------------\n"); - PRUI->storeUpdateRegUsageInfo(MF.getFunction(), std::move(RegMask)); + PRUI->storeUpdateRegUsageInfo(F, std::move(RegMask)); return false; } Index: lib/CodeGen/TargetFrameLoweringImpl.cpp =================================================================== --- lib/CodeGen/TargetFrameLoweringImpl.cpp +++ lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -12,13 +12,14 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/BitVector.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include @@ -59,15 +60,21 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const { - // Get the callee saved register list... const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); // Resize before the early returns. Some backends expect that // SavedRegs.size() == TRI.getNumRegs() after this call even if there are no // saved registers. SavedRegs.resize(TRI.getNumRegs()); + // When interprocedural register allocation is enabled caller saved registers + // are preferred over callee saved registers. + if (UseIPRA && isSafeForNoCSROpt(MF.getFunction())) + return; + + // Get the callee saved register list... + const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); + // Early exit if there are no callee saved registers. if (!CSRegs || CSRegs[0] == 0) return; Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -28,7 +28,6 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" Index: test/CodeGen/X86/ipra-local-linkage.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/ipra-local-linkage.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s | FileCheck %s -check-prefix=NOIPRA +; RUN: llc -enable-ipra < %s | FileCheck %s + +target triple = "x86_64--" + +define internal void @foo() norecurse { +; When IPRA is not enabled R15 will be saved by foo as it is callee saved reg. +; NOIPRA-LABEL: foo: +; NOIPRA: pushq %r15 +; When IPRA is enabled none register should be saved as foo() is local function +; so we optimize it to save no registers. +; CHECK-LABEL: foo: +; CHECK-NOT: pushq %r15 + call void asm sideeffect "movl %r14d, %r15d", "~{r15}"() + ret void +} + +define void @bar(i32 %X) { + call void asm sideeffect "movl %r12d, $0", "{r15}~{r12}"(i32 %X) + ; As R15 is clobbered by foo() when IPRA is enabled value of R15 should be + ; saved if register containing orignal value is also getting clobbered + ; and reloaded after foo(), here original value is loaded back into R15D after + ; call to foo. + call void @foo() + ; CHECK-LABEL: bar: + ; CHECK: callq foo + ; CHECK-NEXT: movl %eax, %r15d + call void asm sideeffect "movl $0, %r12d", "{r15}~{r12}"(i32 %X) + ret void +}