Index: lib/CodeGen/MachineLICM.cpp =================================================================== --- lib/CodeGen/MachineLICM.cpp +++ lib/CodeGen/MachineLICM.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -32,6 +33,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -54,11 +56,17 @@ cl::desc("MachineLICM should hoist even cheap instructions"), cl::init(false), cl::Hidden); -static cl::opt -SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", - cl::desc("MachineLICM should sink instructions into " - "loops to avoid register spills"), - cl::init(false), cl::Hidden); +static cl::opt SinkInstsToAvoidSpills( + "sink-insts-to-avoid-spills", + cl::desc("MachineLICM should sink instructions into loops to avoid " + "register spills"), + cl::init(false), cl::Hidden); + +static cl::opt SinkThresholdDenominator( + "sink-threshold-denominator", + cl::desc("Sink instrutions into the loop only if their probability of " + "being executed in each iteration is less than 1/N"), + cl::init(5), cl::Hidden); STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); @@ -76,6 +84,7 @@ const TargetInstrInfo *TII; const TargetLoweringBase *TLI; const TargetRegisterInfo *TRI; + const MachineBlockFrequencyInfo *MBFI; const MachineFrameInfo *MFI; MachineRegisterInfo *MRI; const InstrItineraryData *InstrItins; @@ -140,6 +149,7 @@ bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); @@ -312,6 +322,7 @@ char &llvm::MachineLICMID = MachineLICM::ID; INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", "Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) @@ -340,6 +351,7 @@ TII = MF.getSubtarget().getInstrInfo(); TLI = MF.getSubtarget().getTargetLowering(); TRI = MF.getSubtarget().getRegisterInfo(); + MBFI = &getAnalysis(); MFI = MF.getFrameInfo(); MRI = &MF.getRegInfo(); InstrItins = MF.getSubtarget().getInstrItineraryData(); @@ -790,6 +802,15 @@ if (!Preheader) return; + // We are sinking instructions into the loop only if they aren't 'likely' to + // be executed during every iteration. + // FIXME: Instead of having a fix threshold, the probability could be a + // factor of the a cost model. + const BranchProbability ColdProb(1, 5); + const BlockFrequency ThresholdFreq = + MBFI->getBlockFreq(CurLoop->getHeader()) * + BranchProbability(1, SinkThresholdDenominator); + SmallVector Candidates; for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); I != Preheader->instr_end(); ++I) { @@ -805,29 +826,23 @@ continue; if (!MRI->hasOneDef(MO.getReg())) continue; - bool CanSink = true; MachineBasicBlock *B = nullptr; for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { - // FIXME: Come up with a proper cost model that estimates whether sinking - // the instruction (and thus possibly executing it on every loop - // iteration) is more expensive than a register. - // For now assumes that copies are cheap and thus almost always worth it. - if (!MI.isCopy()) { - CanSink = false; + if (MI.isPHI()) { + B = nullptr; break; } - if (!B) { - B = MI.getParent(); - continue; - } - B = DT->findNearestCommonDominator(B, MI.getParent()); - if (!B) { - CanSink = false; + B = B ? DT->findNearestCommonDominator(B, MI.getParent()) + : MI.getParent(); + if (!B) break; - } } - if (!CanSink || !B || B == Preheader) + if (!B || B == Preheader) + continue; + + if (MBFI->getBlockFreq(B) > ThresholdFreq) continue; + B->splice(B->getFirstNonPHI(), Preheader, I); } } Index: test/CodeGen/X86/sink-cheap-instructions.ll =================================================================== --- test/CodeGen/X86/sink-cheap-instructions.ll +++ test/CodeGen/X86/sink-cheap-instructions.ll @@ -6,6 +6,8 @@ ; CHECK: Spill ; SINK-NOT: Spill +; SINK: lea{{.*}}20 +; SINK-NEXT: jmp %struct.A = type { i32, i32, i32, i32, i32, i32 } @@ -20,6 +22,7 @@ .backedge: %.0 = phi i8* [ %input, %0 ], [ %7, %.backedge.backedge ] + tail call void @_Z6assignPj(i32* %6) %7 = getelementptr inbounds i8, i8* %.0, i64 1 %8 = load i8, i8* %7, align 1 switch i8 %8, label %.backedge.backedge [ @@ -28,7 +31,6 @@ i8 20, label %11 i8 30, label %12 i8 40, label %13 - i8 50, label %14 ] ;