Index: include/llvm/Analysis/CallGraphSCCPass.h =================================================================== --- include/llvm/Analysis/CallGraphSCCPass.h +++ include/llvm/Analysis/CallGraphSCCPass.h @@ -23,6 +23,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Pass.h" +#include "llvm/PassSupport.h" namespace llvm { @@ -111,6 +112,23 @@ const CallGraph &getCallGraph() { return CG; } }; -} // End llvm namespace +void initializeDummyCGSCCPassPass(PassRegistry &); + +/// This pass is required by interprocedural register allocation. It forces +/// codegen to follow bottom up order on call graph. +class DummyCGSCCPass : public CallGraphSCCPass { +public: + static char ID; + DummyCGSCCPass() : CallGraphSCCPass(ID){ + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeDummyCGSCCPassPass(Registry); + }; + bool runOnSCC(CallGraphSCC &SCC) override { return false; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } +}; +} // End llvm namespace + #endif Index: include/llvm/CodeGen/MachineOperand.h =================================================================== --- include/llvm/CodeGen/MachineOperand.h +++ include/llvm/CodeGen/MachineOperand.h @@ -534,6 +534,15 @@ Contents.MBB = MBB; } + /// Sets value of register mask operand referencing Mask. The + /// operand does not take ownership of the memory referenced by Mask, it must + /// remain valid for the lifetime of the operand. See CreateRegMask(). + /// Any physreg with a 0 bit in the mask is clobbered by the instruction. + void setRegMask(const uint32_t *RegMaskPtr) { + assert(isRegMask() && "Wrong MachineOperand mutator"); + Contents.RegMask = RegMaskPtr; + } + //===--------------------------------------------------------------------===// // Other methods. //===--------------------------------------------------------------------===// Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -361,6 +361,10 @@ /// independently of other lanes and splits them into separate virtual /// registers. extern char &RenameIndependentSubregsID; + + /// This pass is executed POST-RA to collect which physical registers are + /// preserved by given machine function. + FunctionPass *createRegUsageInfoCollector(); } // End llvm namespace /// Target machine pass initializer for passes with dependencies. Use with Index: include/llvm/CodeGen/RegisterUsageInfo.h =================================================================== --- /dev/null +++ include/llvm/CodeGen/RegisterUsageInfo.h @@ -0,0 +1,78 @@ +//==- RegisterUsageInfo.h - Register Usage Informartion Storage -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This pass is required to take advantage of the interprocedural register +/// allocation infrastructure. +/// +/// This pass is simple immutable pass which keeps RegMasks (calculated based on +/// actual register allocation) for functions in a module and provides simple API +/// to query this information. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H +#define LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H + +#include "llvm/Pass.h" +#include "llvm/IR/Module.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class PhysicalRegisterUsageInfo : public ImmutablePass { + virtual void anchor(); + +public: + static char ID; + + PhysicalRegisterUsageInfo() : ImmutablePass(ID) { + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializePhysicalRegisterUsageInfoPass(Registry); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + /// This method is provided to set TargetRegisterInfo * , which is used to dump + /// analysis when command line option -dump-regusage is used. + void setTargetRegisterInfo(const TargetRegisterInfo *TRI_) { + TRI = TRI_; + } + + /// This method is provided to set MCRegisterInfo * , which is used to dump + /// analysis when command line option -dump-regusage is used. + void setMCRegisterInfo(const MCRegisterInfo *MCRI_) { + MCRI = MCRI_; + } + + bool doInitialization(Module &M) override; + + bool doFinalization(Module &M) override; + + void storeUpdateRegUsageInfo(const GlobalVariable* MFGlobalVar, + std::vector RegMasks); + + const std::vector *getRegUsageInfo(const GlobalVariable* MFGlobalVar); + +private: + /// A Dense map from GlobalVariable * for Function to RegMask. + /// In RegMask 0 means register used (clobbered) by function. + /// and 1 means content of register will be preserved around function call. + DenseMap> RegMasks; + const MCRegisterInfo *MCRI; + const TargetRegisterInfo *TRI; +}; +} + +#endif Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -239,6 +239,7 @@ void initializePartiallyInlineLibCallsLegacyPassPass(PassRegistry &); void initializePEIPass(PassRegistry&); void initializePHIEliminationPass(PassRegistry&); +void initializePhysicalRegisterUsageInfoPass(PassRegistry &); void initializePartialInlinerPass(PassRegistry&); void initializePeepholeOptimizerPass(PassRegistry&); void initializePostDomOnlyPrinterPass(PassRegistry&); Index: lib/Analysis/CallGraphSCCPass.cpp =================================================================== --- lib/Analysis/CallGraphSCCPass.cpp +++ lib/Analysis/CallGraphSCCPass.cpp @@ -638,3 +638,8 @@ .getOptBisect() .shouldRunPass(this, SCC); } + +char DummyCGSCCPass::ID = 0; +INITIALIZE_PASS(DummyCGSCCPass, "DummyCGSCCPass", "DummyCGSCCPass", false, + false) + Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -101,6 +101,8 @@ RegisterPressure.cpp RegisterScavenging.cpp RenameIndependentSubregs.cpp + RegisterUsageInfo.cpp + RegUsageInfoCollector.cpp SafeStack.cpp ScheduleDAG.cpp ScheduleDAGInstrs.cpp Index: lib/CodeGen/RegUsageInfoCollector.cpp =================================================================== --- /dev/null +++ lib/CodeGen/RegUsageInfoCollector.cpp @@ -0,0 +1,134 @@ +//===- RegUsageInfoCollector.cpp - Register Usage Informartion Collector --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// This pass is required to take advantage of the interprocedural register +/// allocation infrastructure. +/// +/// This pass is simple MachineFunction pass which collects register usage +/// details by iterating through each physical registers and checking +/// MRI::isPhysRegUsed() then creates a RegMask based on this details. +/// The pass then stores this RegMask in PhysicalRegisterUsageInfo.cpp +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterUsageInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "ip-regalloc" + +namespace llvm { +void initializeRegUsageInfoCollectorPass(PassRegistry &); +} + +namespace { +class RegUsageInfoCollector : public MachineFunctionPass { +public: + RegUsageInfoCollector() : MachineFunctionPass(ID) { + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeRegUsageInfoCollectorPass(Registry); + } + + const char *getPassName() const override { + return "Register Usage Information Collector Pass"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; + +private: + void markRegClobbered(const TargetRegisterInfo *TRI, uint32_t *RegMask, + unsigned PReg); +}; +} // end of anonymous namespace + +char RegUsageInfoCollector::ID = 0; + +INITIALIZE_PASS_BEGIN(RegUsageInfoCollector, "RegUsageInfoCollector", + "Register Usage Information Collector", false, false) +INITIALIZE_PASS_DEPENDENCY(PhysicalRegisterUsageInfo) +INITIALIZE_PASS_END(RegUsageInfoCollector, "RegUsageInfoCollector", + "Register Usage Information Collector", false, false) + +FunctionPass *llvm::createRegUsageInfoCollector() { + return new RegUsageInfoCollector(); +} + +void RegUsageInfoCollector::markRegClobbered(const TargetRegisterInfo *TRI, + uint32_t *RegMask, unsigned PReg) { + // If PReg is clobbered then all of its alias are also clobbered. + for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI) { + RegMask[*AI / 32] &= ~(1u << *AI % 32); + DEBUG(dbgs() << *AI << " "); + } +} + +void RegUsageInfoCollector::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) { + MachineRegisterInfo *MRI = &MF.getRegInfo(); + TargetRegisterInfo *TRI = + (TargetRegisterInfo *)MF.getSubtarget().getRegisterInfo(); + const TargetMachine &TM = MF.getTarget(); + const MCRegisterInfo *MCRI = TM.getMCRegisterInfo(); + + DEBUG(dbgs() << " -------------------- " << getPassName() + << " ----------------------- \n"); + DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n"); + + std::vector RegMask; + + // Compute the size of the bit vector to represent all the registers. + // The bit vector is broken into 32-bit chunks, thus takes the ceil of + // the number of registers divided by 32 for the size. + unsigned regMaskSize = (TRI->getNumRegs() + 31) / 32; + RegMask.resize(regMaskSize, 0xFFFFFFFF); + + PhysicalRegisterUsageInfo *PRUI = &getAnalysis(); + PRUI->setTargetRegisterInfo(TRI); + PRUI->setMCRegisterInfo(MCRI); + + DEBUG(dbgs() << "Clobbered Registers: "); + for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { + if (!MRI->reg_nodbg_empty(PReg) && MRI->isPhysRegUsed(PReg)) + markRegClobbered(TRI, &RegMask[0], PReg); + } + + const uint32_t *CallPreservedMask = + TRI->getCallPreservedMask(MF, MF.getFunction()->getCallingConv()); + // Set callee saved register as preserved. + for (unsigned index = 0; index < regMaskSize; index++) { + RegMask[index] = RegMask[index] | CallPreservedMask[index]; + } + + DEBUG(dbgs() << " \n-----------------------------------------------------------" + "------------------ \n"); + + const Module *Mdl = MF.getFunction()->getParent(); + + PRUI->storeUpdateRegUsageInfo(Mdl->getNamedGlobal(MF.getName()), + std::move(RegMask)); + + return false; +} Index: lib/CodeGen/RegisterUsageInfo.cpp =================================================================== --- /dev/null +++ lib/CodeGen/RegisterUsageInfo.cpp @@ -0,0 +1,65 @@ +//===- RegisterUsageInfo.cpp - Register Usage Informartion Storage --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// This pass is required to take advantage of the interprocedural register +/// allocation infrastructure. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/Module.h" +#include "llvm/CodeGen/RegisterUsageInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "ip-regalloc" + +cl::opt DumpRegUsage("dump-regusage", cl::init(false), cl::Hidden, + cl::desc("Dump register usage details collected for analysis.")); + +INITIALIZE_PASS(PhysicalRegisterUsageInfo, "reg-usage-info", + "Register Usage Informartion Stroage", false, true) + +char PhysicalRegisterUsageInfo::ID = 0; + +void PhysicalRegisterUsageInfo::anchor() {} + +bool PhysicalRegisterUsageInfo::doInitialization(Module &M) { + RegMasks.grow(M.size()); + return false; +} + +bool PhysicalRegisterUsageInfo::doFinalization(Module &M) { + if (DumpRegUsage) { + for (auto pair : RegMasks) { + errs() << "Clobbered Registers: "; + for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) { + if (!(pair.second[PReg / 32] & 1u << PReg % 32)) + errs() << MCRI->getName(PReg) << " "; + } + errs() << "\n"; + } + } + + RegMasks.shrink_and_clear(); + return false; +} + +void PhysicalRegisterUsageInfo::storeUpdateRegUsageInfo( + const GlobalVariable* MFGlobalVar, std::vector RegMask) { + RegMasks[MFGlobalVar] = std::move(RegMask); +} + +const std::vector * +PhysicalRegisterUsageInfo::getRegUsageInfo(const GlobalVariable* MFGlobalVar) { + if (RegMasks.find(MFGlobalVar) != RegMasks.end()) + return &(RegMasks.find(MFGlobalVar)->second); + return nullptr; +} Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -15,11 +15,13 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/CFLAliasAnalysis.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RegisterUsageInfo.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" @@ -112,6 +114,11 @@ cl::init(false), cl::Hidden, cl::desc("Enable the new, experimental CFL alias analysis in CodeGen")); +cl::opt + UseIPRA("enable-ipra", cl::init(false), cl::Hidden, + cl::desc("Enable interprocedural register allocation " + "to reduce load/store at procedure calls.")); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -492,6 +499,11 @@ void TargetPassConfig::addISelPrepare() { addPreISel(); + if (UseIPRA) { + // Force codegen to run according to the callgraph. + addPass(new DummyCGSCCPass); + } + // Add both the safe stack and the stack protection passes: each of them will // only protect functions that have corresponding attributes. addPass(createSafeStackPass(TM)); @@ -612,7 +624,13 @@ addBlockPlacement(); addPreEmitPass(); - + + if (UseIPRA) { + // Collect register usage information and produce a register mask of + // clobbered registers, to be used to optimize call sites. + addPass(createRegUsageInfoCollector()); + } + addPass(&FuncletLayoutID, false); addPass(&StackMapLivenessID, false); Index: test/CodeGen/Generic/reg-usage-info.ll =================================================================== --- /dev/null +++ test/CodeGen/Generic/reg-usage-info.ll @@ -0,0 +1,54 @@ +; RUN: llc -enable-ipra -dump-regusage -o /dev/null 2>&1 < %s | FileCheck %s +; CHECK: Clobbered Registers: AH AL AX DI DIL EAX EDI EFLAGS ESP RAX RDI RSP SP SPL + + +target triple = "x86_64-apple-macosx10.11.0" + +; Function Attrs: nounwind ssp uwtable +define i32 @fib(i32 %n) #0 { +entry: + %retval = alloca i32, align 4 + %n.addr = alloca i32, align 4 + store i32 %n, i32* %n.addr, align 4 + %0 = load i32, i32* %n.addr, align 4 + %cmp = icmp eq i32 %0, 1 + br i1 %cmp, label %if.then, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %1 = load i32, i32* %n.addr, align 4 + %cmp1 = icmp eq i32 %1, 2 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %lor.lhs.false, %entry + store i32 1, i32* %retval, align 4 + br label %return + +if.end: ; preds = %lor.lhs.false + %2 = load i32, i32* %n.addr, align 4 + %sub = sub nsw i32 %2, 1 + %call = call i32 @fib(i32 %sub) + %3 = load i32, i32* %n.addr, align 4 + %sub2 = sub nsw i32 %3, 2 + %call3 = call i32 @fib(i32 %sub2) + %add = add nsw i32 %call, %call3 + store i32 %add, i32* %retval, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %4 = load i32, i32* %retval, align 4 + ret i32 %4 +} + +; Function Attrs: nounwind ssp uwtable +define i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %n = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + store i32 10, i32* %n, align 4 + %0 = load i32, i32* %n, align 4 + %call = call i32 @fib(i32 %0) + ret i32 %call +} + +attributes #0 = { nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }