diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.h b/llvm/lib/Target/ARM/Thumb1InstrInfo.h --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.h @@ -53,6 +53,13 @@ const TargetRegisterInfo *TRI) const override; bool canCopyGluedNodeDuringSchedule(SDNode *N) const override; + +protected: + virtual MachineInstr *foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, + LiveIntervals *LIS = nullptr) const override; + private: void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; }; diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -152,3 +152,33 @@ return false; } + +MachineInstr *Thumb1InstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, + MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, + LiveIntervals *LIS) const { + // Replace: + // ldr Rd, func address + // blx Rd + // with: + // bl func + + if (MI.getOpcode() == ARM::tBLXr && LoadMI.getOpcode() == ARM::tLDRpci && + MI.getParent() == LoadMI.getParent()) { + unsigned CPI = LoadMI.getOperand(1).getIndex(); + const MachineConstantPool *MCP = MF.getConstantPool(); + const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI]; + assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry"); + const Constant *Callee = cast(CPE.Val.ConstVal); + const char *FuncName = MF.createExternalSymbolName(Callee->getName()); + MachineInstrBuilder MIB = + BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(ARM::tBL)) + .add(predOps(ARMCC::AL)) + .addExternalSymbol(FuncName); + for (auto &MO : MI.implicit_operands()) + MIB.add(MO); + return MIB.getInstr(); + } + + return nullptr; +} diff --git a/llvm/test/CodeGen/ARM/minsize-call-cse-2.ll b/llvm/test/CodeGen/ARM/minsize-call-cse-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/minsize-call-cse-2.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m-arm-none-eabi" + +; CHECK-LABEL: f: +; CHECK: bl g +; CHECK: blx r +; CHECK: bl g +; CHECK: bl g +define void @f(i32* %p, i32 %x, i32 %y, i32 %z) minsize optsize { +entry: + call void @g(i32* %p, i32 %x, i32 %y, i32 %z) + call void @g(i32* %p, i32 %x, i32 %y, i32 %z) + call void @g(i32* %p, i32 %x, i32 %y, i32 %z) + call void @g(i32* %p, i32 %x, i32 %y, i32 %z) + ret void +} + +declare void @g(i32*,i32,i32,i32)