Index: llvm/trunk/include/llvm/MC/MCSchedule.h =================================================================== --- llvm/trunk/include/llvm/MC/MCSchedule.h +++ llvm/trunk/include/llvm/MC/MCSchedule.h @@ -186,6 +186,8 @@ // takes to recover from a branch misprediction. unsigned MispredictPenalty; static const unsigned DefaultMispredictPenalty = 10; + + bool PostRAScheduler; // default value is false bool CompleteModel; @@ -210,7 +212,8 @@ LoadLatency(DefaultLoadLatency), HighLatency(DefaultHighLatency), MispredictPenalty(DefaultMispredictPenalty), - CompleteModel(true), ProcID(0), ProcResourceTable(nullptr), + PostRAScheduler(false), CompleteModel(true), + ProcID(0), ProcResourceTable(nullptr), SchedClassTable(nullptr), NumProcResourceKinds(0), NumSchedClasses(0), InstrItineraries(nullptr) { (void)NumProcResourceKinds; @@ -219,12 +222,13 @@ // Table-gen driven ctor. MCSchedModel(unsigned iw, int mbs, int lmbs, unsigned ll, unsigned hl, - unsigned mp, bool cm, unsigned pi, const MCProcResourceDesc *pr, - const MCSchedClassDesc *sc, unsigned npr, unsigned nsc, - const InstrItinerary *ii): + unsigned mp, bool postRASched, bool cm, unsigned pi, + const MCProcResourceDesc *pr, const MCSchedClassDesc *sc, + unsigned npr, unsigned nsc, const InstrItinerary *ii): IssueWidth(iw), MicroOpBufferSize(mbs), LoopMicroOpBufferSize(lmbs), LoadLatency(ll), HighLatency(hl), - MispredictPenalty(mp), CompleteModel(cm), ProcID(pi), + MispredictPenalty(mp), PostRAScheduler(postRASched), + CompleteModel(cm), ProcID(pi), ProcResourceTable(pr), SchedClassTable(sc), NumProcResourceKinds(npr), NumSchedClasses(nsc), InstrItineraries(ii) {} Index: llvm/trunk/include/llvm/Target/TargetSchedule.td =================================================================== --- llvm/trunk/include/llvm/Target/TargetSchedule.td +++ llvm/trunk/include/llvm/Target/TargetSchedule.td @@ -88,6 +88,8 @@ // Per-cycle resources tables. ProcessorItineraries Itineraries = NoItineraries; + bit PostRAScheduler = 0; // Enable Post RegAlloc Scheduler pass. + // Subtargets that define a model for only a subset of instructions // that have a scheduling class (itinerary class or SchedRW list) // and may actually be generated for that subtarget must clear this Index: llvm/trunk/include/llvm/Target/TargetSubtargetInfo.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetSubtargetInfo.h +++ llvm/trunk/include/llvm/Target/TargetSubtargetInfo.h @@ -90,16 +90,26 @@ // dependency. virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep& dep) const { } - - // enablePostRAScheduler - If the target can benefit from post-regalloc - // scheduling and the specified optimization level meets the requirement - // return true to enable post-register-allocation scheduling. In - // CriticalPathRCs return any register classes that should only be broken - // if on the critical path. - virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const; - + + // For use with PostRAScheduling: get the anti-dependence breaking that should + // be performed before post-RA scheduling. + virtual AntiDepBreakMode getAntiDepBreakMode() const { + return ANTIDEP_NONE; + } + + // For use with PostRAScheduling: in CriticalPathRCs, return any register + // classes that should only be considered for anti-dependence breaking if they + // are on the critical path. + virtual void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { + return CriticalPathRCs.clear(); + } + + // For use with PostRAScheduling: get the minimum optimization level needed + // to enable post-RA scheduling. + virtual CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const { + return CodeGenOpt::Default; + } + /// \brief True if the subtarget should run the local reassignment /// heuristic of the register allocator. /// This heuristic may be compile time intensive, \p OptLevel provides Index: llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp =================================================================== --- llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp +++ llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp @@ -98,6 +98,11 @@ } bool runOnMachineFunction(MachineFunction &Fn) override; + + bool enablePostRAScheduler( + const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode &Mode, + TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const; }; char PostRAScheduler::ID = 0; @@ -245,6 +250,17 @@ } #endif +bool PostRAScheduler::enablePostRAScheduler( + const TargetSubtargetInfo &ST, + CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode &Mode, + TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const { + Mode = ST.getAntiDepBreakMode(); + ST.getCriticalPathRCs(CriticalPathRCs); + return ST.enablePostMachineScheduler() && + OptLevel >= ST.getOptLevelToEnablePostRAScheduler(); +} + bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { if (skipOptnoneFunction(*Fn.getFunction())) return false; @@ -267,9 +283,10 @@ } else { // Check that post-RA scheduling is enabled for this target. // This may upgrade the AntiDepMode. - const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget(); - if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode, - CriticalPathRCs)) + const TargetSubtargetInfo &ST = + Fn.getTarget().getSubtarget(); + if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(), + AntiDepMode, CriticalPathRCs)) return false; } Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.h +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h @@ -105,9 +105,6 @@ /// NoARM - True if subtarget does not support ARM mode execution. bool NoARM; - /// PostRAScheduler - True if using post-register-allocation scheduler. - bool PostRAScheduler; - /// IsR9Reserved - True if R9 is a not available as general purpose register. bool IsR9Reserved; @@ -429,12 +426,7 @@ bool hasSinCos() const; /// True for some subtargets at > -O0. - bool enablePostMachineScheduler() const; - - /// enablePostRAScheduler - True at 'More' optimization. - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const override; + bool enablePostMachineScheduler() const override; // enableAtomicExpandLoadLinked - True if we need to expand our atomics. bool enableAtomicExpandLoadLinked() const override; Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp @@ -191,7 +191,6 @@ InThumbMode = false; HasThumb2 = false; NoARM = false; - PostRAScheduler = false; IsR9Reserved = ReserveR9; UseMovt = false; SupportsTailCall = false; @@ -308,9 +307,6 @@ SupportsTailCall = !isThumb1Only(); } - if (!isThumb() || hasThumb2()) - PostRAScheduler = true; - switch (Align) { case DefaultAlign: // Assume pre-ARMv6 doesn't support unaligned accesses. @@ -425,25 +421,15 @@ !getTargetTriple().isOSVersionLT(7, 0); } -// Enable the PostMachineScheduler if the target selects it instead of -// PostRAScheduler. Currently only available on the command line via -// -misched-postra. +// This overrides the PostRAScheduler bit in the SchedModel for any CPU. bool ARMSubtarget::enablePostMachineScheduler() const { - return PostRAScheduler; + return (!isThumb() || hasThumb2()); } bool ARMSubtarget::enableAtomicExpandLoadLinked() const { return hasAnyDataBarrier() && !isThumb1Only(); } -bool ARMSubtarget::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_NONE; - return PostRAScheduler && OptLevel >= CodeGenOpt::Default; -} - bool ARMSubtarget::useMovt(const MachineFunction &MF) const { // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit // immediates as it is inherently position independent, and may be out of Index: llvm/trunk/lib/Target/Mips/MipsSubtarget.h =================================================================== --- llvm/trunk/lib/Target/Mips/MipsSubtarget.h +++ llvm/trunk/lib/Target/Mips/MipsSubtarget.h @@ -160,9 +160,10 @@ std::unique_ptr TLInfoSE; public: - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const override; + /// This overrides the PostRAScheduler bit in the SchedModel for each CPU. + bool enablePostMachineScheduler() const override; + void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override; + CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override; /// Only O32 and EABI supported right now. bool isABI_EABI() const { return MipsABI == EABI; } Index: llvm/trunk/lib/Target/Mips/MipsSubtarget.cpp =================================================================== --- llvm/trunk/lib/Target/Mips/MipsSubtarget.cpp +++ llvm/trunk/lib/Target/Mips/MipsSubtarget.cpp @@ -177,15 +177,17 @@ UseSmallSection = !IsLinux && (RM == Reloc::Static); } -bool -MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode &Mode, - RegClassVector &CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_NONE; +/// This overrides the PostRAScheduler bit in the SchedModel for any CPU. +bool MipsSubtarget::enablePostMachineScheduler() const { return true; } + +void MipsSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { CriticalPathRCs.clear(); - CriticalPathRCs.push_back(isGP64bit() ? &Mips::GPR64RegClass - : &Mips::GPR32RegClass); - return OptLevel >= CodeGenOpt::Aggressive; + CriticalPathRCs.push_back(isGP64bit() ? + &Mips::GPR64RegClass : &Mips::GPR32RegClass); +} + +CodeGenOpt::Level MipsSubtarget::getOptLevelToEnablePostRAScheduler() const { + return CodeGenOpt::Aggressive; } MipsSubtarget & Index: llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h +++ llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h @@ -225,15 +225,15 @@ bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } - /// enablePostRAScheduler - True at 'More' optimization. - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const override; - bool enableEarlyIfConversion() const override { return hasISEL(); } // Scheduling customization. bool enableMachineScheduler() const override; + // This overrides the PostRAScheduler bit in the SchedModel for each CPU. + bool enablePostMachineScheduler() const override; + AntiDepBreakMode getAntiDepBreakMode() const override; + void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override; + void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, MachineInstr *end, Index: llvm/trunk/lib/Target/PowerPC/PPCSubtarget.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCSubtarget.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCSubtarget.cpp @@ -222,22 +222,6 @@ GV->hasCommonLinkage() || isDecl; } -bool PPCSubtarget::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_ALL; - - CriticalPathRCs.clear(); - - if (isPPC64()) - CriticalPathRCs.push_back(&PPC::G8RCRegClass); - else - CriticalPathRCs.push_back(&PPC::GPRCRegClass); - - return OptLevel >= CodeGenOpt::Default; -} - // Embedded cores need aggressive scheduling (and some others also benefit). static bool needsAggressiveScheduling(unsigned Directive) { switch (Directive) { @@ -259,6 +243,19 @@ return needsAggressiveScheduling(DarwinDirective); } +// This overrides the PostRAScheduler bit in the SchedModel for each CPU. +bool PPCSubtarget::enablePostMachineScheduler() const { return true; } + +PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const { + return TargetSubtargetInfo::ANTIDEP_ALL; +} + +void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { + CriticalPathRCs.clear(); + CriticalPathRCs.push_back(isPPC64() ? + &PPC::G8RCRegClass : &PPC::GPRCRegClass); +} + void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, MachineInstr *end, Index: llvm/trunk/lib/Target/TargetSubtargetInfo.cpp =================================================================== --- llvm/trunk/lib/Target/TargetSubtargetInfo.cpp +++ llvm/trunk/lib/Target/TargetSubtargetInfo.cpp @@ -53,16 +53,7 @@ } bool TargetSubtargetInfo::enablePostMachineScheduler() const { - return false; -} - -bool TargetSubtargetInfo::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = ANTIDEP_NONE; - CriticalPathRCs.clear(); - return false; + return getSchedModel()->PostRAScheduler; } bool TargetSubtargetInfo::useAA() const { Index: llvm/trunk/lib/Target/X86/X86Schedule.td =================================================================== --- llvm/trunk/lib/Target/X86/X86Schedule.td +++ llvm/trunk/lib/Target/X86/X86Schedule.td @@ -633,6 +633,7 @@ let MicroOpBufferSize = 32; let LoadLatency = 4; let HighLatency = 10; + let PostRAScheduler = 0; } include "X86ScheduleAtom.td" Index: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td +++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td @@ -538,6 +538,7 @@ // On the Atom, the throughput for taken branches is 2 cycles. For small // simple loops, expand by a small factor to hide the backedge cost. let LoopMicroOpBufferSize = 10; + let PostRAScheduler = 1; let Itineraries = AtomItineraries; } Index: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td +++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td @@ -19,6 +19,7 @@ let MicroOpBufferSize = 32; // Based on the reorder buffer. let LoadLatency = 3; let MispredictPenalty = 10; + let PostRAScheduler = 1; // For small loops, expand by a small factor to hide the backedge cost. let LoopMicroOpBufferSize = 10; Index: llvm/trunk/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h +++ llvm/trunk/lib/Target/X86/X86Subtarget.h @@ -170,9 +170,6 @@ /// full divides and should be used when possible. bool HasSlowDivide; - /// PostRAScheduler - True if using post-register-allocation scheduler. - bool PostRAScheduler; - /// PadShortFunctions - True if the short functions should be padded to prevent /// a stall when returning too early. bool PadShortFunctions; @@ -453,18 +450,15 @@ /// Enable the MachineScheduler pass for all X86 subtargets. bool enableMachineScheduler() const override { return true; } - /// enablePostRAScheduler - run for Atom optimization. - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const override; - - bool postRAScheduler() const { return PostRAScheduler; } - bool enableEarlyIfConversion() const override; /// getInstrItins = Return the instruction itineraries based on the /// subtarget selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + + AntiDepBreakMode getAntiDepBreakMode() const override { + return TargetSubtargetInfo::ANTIDEP_CRITICAL; + } }; } // End llvm namespace Index: llvm/trunk/lib/Target/X86/X86Subtarget.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.cpp +++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp @@ -219,9 +219,6 @@ // Make sure the right MCSchedModel is used. InitCPUSchedModel(CPUName); - if (X86ProcFamily == IntelAtom || X86ProcFamily == IntelSLM) - PostRAScheduler = true; - InstrItins = getInstrItineraryForCPU(CPUName); // It's important to keep the MCSubtargetInfo feature bits in sync with @@ -286,7 +283,6 @@ HasCmpxchg16b = false; UseLeaForSP = false; HasSlowDivide = false; - PostRAScheduler = false; PadShortFunctions = false; CallRegIndirect = false; LEAUsesAG = false; @@ -359,16 +355,7 @@ is64Bit() ? -8 : -4), JITInfo(hasSSE1()) {} -bool -X86Subtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode &Mode, - RegClassVector &CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; - CriticalPathRCs.clear(); - return PostRAScheduler && OptLevel >= CodeGenOpt::Default; -} - -bool -X86Subtarget::enableEarlyIfConversion() const { +bool X86Subtarget::enableEarlyIfConversion() const { return hasCMov() && X86EarlyIfConv; } + Index: llvm/trunk/utils/TableGen/SubtargetEmitter.cpp =================================================================== --- llvm/trunk/utils/TableGen/SubtargetEmitter.cpp +++ llvm/trunk/utils/TableGen/SubtargetEmitter.cpp @@ -1201,6 +1201,10 @@ EmitProcessorProp(OS, PI->ModelDef, "MispredictPenalty", ','); OS << " " << (bool)(PI->ModelDef ? + PI->ModelDef->getValueAsBit("PostRAScheduler") : 0) + << ", // " << "PostRAScheduler\n"; + + OS << " " << (bool)(PI->ModelDef ? PI->ModelDef->getValueAsBit("CompleteModel") : 0) << ", // " << "CompleteModel\n";