diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -366,7 +366,7 @@ /// Check if given function is safe for not having callee saved registers. /// This is used when interprocedural register allocation is enabled. - static bool isSafeForNoCSROpt(const Function &F) { + virtual bool enableNoCSROpt(const Function &F) const { if (!F.hasLocalLinkage() || F.hasAddressTaken() || !F.hasFnAttribute(Attribute::NoRecurse)) return false; diff --git a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp --- a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp +++ b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp @@ -172,7 +172,7 @@ SetRegAsDefined(PReg); } - if (TargetFrameLowering::isSafeForNoCSROpt(F)) { + if (MF.getSubtarget().getFrameLowering()->enableNoCSROpt(F)) { ++NumCSROpt; LLVM_DEBUG(dbgs() << MF.getName() << " function optimized for not having CSR.\n"); diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp --- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -71,7 +71,7 @@ // When interprocedural register allocation is enabled caller saved registers // are preferred over callee saved registers. - if (MF.getTarget().Options.EnableIPRA && isSafeForNoCSROpt(MF.getFunction())) + if (MF.getTarget().Options.EnableIPRA && enableNoCSROpt(MF.getFunction())) return; // Get the callee saved register list... @@ -125,4 +125,4 @@ unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF) const { llvm_unreachable("getInitialCFARegister() not implemented!"); -} \ No newline at end of file +} diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -811,7 +811,7 @@ static bool callWaitsOnFunctionEntry(const MachineInstr &MI) { // Currently all conventions wait, but this may not always be the case. // - // TODO: If IPRA is enabled, and the callee is isSafeForNoCSROpt, it may make + // TODO: If IPRA is enabled, and the callee is enableNoCSROpt, it may make // senses to omit the wait and do it in the caller. return true; } diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h --- a/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -63,6 +63,11 @@ bool enableShrinkWrapping(const MachineFunction &MF) const override { return true; } + bool enableNoCSROpt(const Function &F) const override { + // The no-CSR optimisation is bad for code size on ARM, because we can save + // many registers with a single PUSH/POP pair. + return false; + } private: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/llvm/test/CodeGen/ARM/ipra-no-csr.ll b/llvm/test/CodeGen/ARM/ipra-no-csr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/ipra-no-csr.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple armv7a--none-eabi < %s | FileCheck %s +; RUN: llc -mtriple armv7a--none-eabi < %s -enable-ipra | FileCheck %s + +; Other targets disable callee-saved registers for internal functions when +; using IPRA, but that isn't profitable for ARM because the PUSH/POP +; instructions can more efficiently save registers than using individual +; LDR/STRs in the caller. + +define internal void @callee() norecurse { +; CHECK-LABEL: callee: +entry: +; CHECK: push {r4, lr} +; CHECK: pop {r4, pc} + tail call void asm sideeffect "", "~{r4}"() + ret void +} + +define void @caller() { +entry: + call void @callee() + ret void +}