Index: lib/CodeGen/MachineLICM.cpp =================================================================== --- lib/CodeGen/MachineLICM.cpp +++ lib/CodeGen/MachineLICM.cpp @@ -54,6 +54,12 @@ cl::desc("MachineLICM should hoist even cheap instructions"), cl::init(false), cl::Hidden); +static cl::opt +SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", + cl::desc("MachineLICM should sink instructions into " + "loops to avoid register spills"), + cl::init(false), cl::Hidden); + STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); STATISTIC(NumLowRP, @@ -243,6 +249,11 @@ void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); void HoistRegion(MachineDomTreeNode *N, bool IsHeader); + /// SinkIntoLoop - Sink instructions into loops if profitable. This + /// especially tries to prevent register spills caused by register pressure + /// if there is little to no overhead moving instructions into loops. + void SinkIntoLoop(); + /// getRegisterClassIDAndCost - For a given MI, register, and the operand /// index, return the ID and cost of its representative register class by /// reference. @@ -381,6 +392,9 @@ FirstInLoop = true; HoistOutOfLoop(N); CSEMap.clear(); + + if (SinkInstsToAvoidSpills) + SinkIntoLoop(); } } @@ -771,6 +785,53 @@ } } +void MachineLICM::SinkIntoLoop() { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + + SmallVector Candidates; + for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); + I != Preheader->instr_end(); ++I) { + // We need to ensure that we can safely move this instruction into the loop. + // As such, it must have side-effects, e.g. such as a call has. + if (IsLoopInvariantInst(*I)) + Candidates.push_back(I); + } + + for (MachineInstr *I : Candidates) { + const MachineOperand &MO = I->getOperand(0); + if (!MO.isDef() || !MO.isReg() || !MO.getReg()) + continue; + if (!MRI->hasOneDef(MO.getReg())) + continue; + bool CanSink = true; + MachineBasicBlock *B = nullptr; + for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { + // FIXME: Come up with a proper cost model that estimates whether sinking + // the instruction (and thus possibly executing it on every loop + // iteration) is more expensive than a register. + // For now assumes that copies are cheap and thus almost always worth it. + if (!MI.isCopy()) { + CanSink = false; + break; + } + if (!B) { + B = MI.getParent(); + continue; + } + B = DT->findNearestCommonDominator(B, MI.getParent()); + if (!B) { + CanSink = false; + break; + } + } + if (!CanSink || !B || B == Preheader) + continue; + B->splice(B->getFirstNonPHI(), Preheader, I); + } +} + static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); } Index: test/CodeGen/X86/sink-cheap-instructions.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/sink-cheap-instructions.ll @@ -0,0 +1,62 @@ +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK +; RUN: llc < %s -mtriple=x86_64-linux -sink-insts-to-avoid-spills | FileCheck %s -check-prefix=SINK + +; Ensure that we sink copy-like instructions into loops to avoid register +; spills. + +; CHECK: Spill +; SINK-NOT: Spill + +%struct.A = type { i32, i32, i32, i32, i32, i32 } + +define void @_Z1fPhP1A(i8* nocapture readonly %input, %struct.A* %a) { + %1 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0 + %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 1 + %3 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 2 + %4 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 3 + %5 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 4 + %6 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 5 + br label %.backedge + +.backedge: + %.0 = phi i8* [ %input, %0 ], [ %7, %.backedge.backedge ] + %7 = getelementptr inbounds i8, i8* %.0, i64 1 + %8 = load i8, i8* %7, align 1 + switch i8 %8, label %.backedge.backedge [ + i8 0, label %9 + i8 10, label %10 + i8 20, label %11 + i8 30, label %12 + i8 40, label %13 + i8 50, label %14 + ] + +;