Index: llvm/trunk/include/llvm/CodeGen/ExecutionDepsFix.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/ExecutionDepsFix.h +++ llvm/trunk/include/llvm/CodeGen/ExecutionDepsFix.h @@ -131,24 +131,13 @@ int Def; }; -class ExecutionDepsFix : public MachineFunctionPass { - SpecificBumpPtrAllocator Allocator; - SmallVector Avail; - - const TargetRegisterClass *const RC; - MachineFunction *MF; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - RegisterClassInfo RegClassInfo; - std::vector> AliasMap; - const unsigned NumRegs; - LiveReg *LiveRegs; +/// This class provides the basic blocks traversal order used by passes like +/// ReachingDefAnalysis and ExecutionDomainFix. +/// It identifies basic blocks that are part of loops and should to be visited twice +/// and returns efficient traversal order for all the blocks. +class LoopTraversal { +private: struct MBBInfo { - // Keeps clearance and domain information for all registers. Note that this - // is different from the usual definition notion of liveness. The CPU - // doesn't care whether or not we consider a register killed. - LiveReg *OutRegs = nullptr; - // Whether we have gotten to this block in primary processing yet. bool PrimaryCompleted = false; @@ -166,22 +155,118 @@ using MBBInfoMap = DenseMap; MBBInfoMap MBBInfos; - /// List of undefined register reads in this block in forward order. - std::vector> UndefReads; +public: + struct TraversedMBBInfo { + MachineBasicBlock *MBB = nullptr; + bool PrimaryPass = true; + bool IsDone = true; + + TraversedMBBInfo(MachineBasicBlock *BB = nullptr, bool Primary = true, + bool Done = true) + : MBB(BB), PrimaryPass(Primary), IsDone(Done) {} + }; + LoopTraversal() {} - /// Storage for register unit liveness. - LivePhysRegs LiveRegSet; + SmallVector traverse(MachineFunction &MF); + +private: + bool isBlockDone(MachineBasicBlock *MBB); + +}; + +/// This class provides the reaching def analysis. +class ReachingDefAnalysis : public MachineFunctionPass { +private: + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + RegisterClassInfo RegClassInfo; + unsigned NumRegUnits; + LiveReg *LiveRegs; + + // Keeps clearance information for all registers. Note that this + // is different from the usual definition notion of liveness. The CPU + // doesn't care whether or not we consider a register killed. + using OutRegsInfoMap = DenseMap; + OutRegsInfoMap MBBOutRegsInfos; /// Current instruction number. /// The first instruction in each basic block is 0. int CurInstr; + /// Maps instructions to their instruction Ids, relative to the begining of + /// their basic blocks. + DenseMap InstIds; + + /// All reaching defs of a given RegUnit for a given MBB. + using MBBRegUnitDefs = SmallVector; + /// All reaching defs of all reg units for a given MBB + using MBBDefsInfo = std::vector; + /// All reaching defs of all reg units for a all MBBs + using MBBReachingDefsInfo = SmallVector; + MBBReachingDefsInfo MBBReachingDefs; + public: - ExecutionDepsFix(char &PassID, const TargetRegisterClass &RC) + static char ID; // Pass identification, replacement for typeid + + ReachingDefAnalysis() : MachineFunctionPass(ID) { + initializeReachingDefAnalysisPass(*PassRegistry::getPassRegistry()); + } + void releaseMemory() override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + /// Provides the instruction id of the closest reaching def instruction of + /// PhysReg that reaches MI, relative to the begining of MI's basic block. + int getReachingDef(MachineInstr *MI, int PhysReg); + /// Provides the clearance - the number of instructions since the closest + /// reaching def instuction of PhysReg that reaches MI. + int getClearance(MachineInstr *MI, MCPhysReg PhysReg); + +private: + void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + void leaveBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + void processBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + void processDefs(MachineInstr *); +}; + +class ExecutionDomainFix : public MachineFunctionPass { + SpecificBumpPtrAllocator Allocator; + SmallVector Avail; + + const TargetRegisterClass *const RC; + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + RegisterClassInfo RegClassInfo; + std::vector> AliasMap; + const unsigned NumRegs; + LiveReg *LiveRegs; + // Keeps domain information for all registers. Note that this + // is different from the usual definition notion of liveness. The CPU + // doesn't care whether or not we consider a register killed. + using OutRegsInfoMap = DenseMap; + OutRegsInfoMap MBBOutRegsInfos; + + ReachingDefAnalysis *RDA; + +public: + ExecutionDomainFix(char &PassID, const TargetRegisterClass &RC) : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -211,14 +296,53 @@ void collapse(DomainValue *dv, unsigned domain); bool merge(DomainValue *A, DomainValue *B); - void enterBasicBlock(MachineBasicBlock*); - void leaveBasicBlock(MachineBasicBlock*); - bool isBlockDone(MachineBasicBlock *); - void processBasicBlock(MachineBasicBlock *MBB, bool PrimaryPass); + void enterBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + void leaveBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); + void processBasicBlock(const LoopTraversal::TraversedMBBInfo &TraversedMBB); bool visitInstr(MachineInstr *); - void processDefs(MachineInstr *, bool breakDependency, bool Kill); + void processDefs(MachineInstr *, bool Kill); void visitSoftInstr(MachineInstr*, unsigned mask); void visitHardInstr(MachineInstr*, unsigned domain); +}; + +class BreakFalseDeps : public MachineFunctionPass { +private: + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + RegisterClassInfo RegClassInfo; + + /// List of undefined register reads in this block in forward order. + std::vector> UndefReads; + + /// Storage for register unit liveness. + LivePhysRegs LiveRegSet; + + ReachingDefAnalysis *RDA; + +public: + static char ID; // Pass identification, replacement for typeid + + BreakFalseDeps() : MachineFunctionPass(ID) { + initializeBreakFalseDepsPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + void processBasicBlock(MachineBasicBlock *MBB); + void processDefs(MachineInstr *MI); bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, unsigned Pref); bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref); Index: llvm/trunk/include/llvm/InitializePasses.h =================================================================== --- llvm/trunk/include/llvm/InitializePasses.h +++ llvm/trunk/include/llvm/InitializePasses.h @@ -80,6 +80,7 @@ void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&); void initializeBranchRelaxationPass(PassRegistry&); void initializeBreakCriticalEdgesPass(PassRegistry&); +void initializeBreakFalseDepsPass(PassRegistry&); void initializeCallSiteSplittingLegacyPassPass(PassRegistry&); void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&); void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&); @@ -313,6 +314,7 @@ void initializeRAGreedyPass(PassRegistry&); void initializeReassociateLegacyPassPass(PassRegistry&); void initializeRegBankSelectPass(PassRegistry&); +void initializeReachingDefAnalysisPass(PassRegistry&); void initializeRegToMemPass(PassRegistry&); void initializeRegionInfoPassPass(PassRegistry&); void initializeRegionOnlyPrinterPass(PassRegistry&); Index: llvm/trunk/lib/CodeGen/ExecutionDepsFix.cpp =================================================================== --- llvm/trunk/lib/CodeGen/ExecutionDepsFix.cpp +++ llvm/trunk/lib/CodeGen/ExecutionDepsFix.cpp @@ -25,16 +25,27 @@ #define DEBUG_TYPE "execution-deps-fix" +char ReachingDefAnalysis::ID = 0; +INITIALIZE_PASS(ReachingDefAnalysis, "reaching-deps-analysis", + "ReachingDefAnalysis", false, true) + +char BreakFalseDeps::ID = 0; +INITIALIZE_PASS_BEGIN(BreakFalseDeps, "break-false-deps", "BreakFalseDeps", + false, false) +INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) +INITIALIZE_PASS_END(BreakFalseDeps, "break-false-deps", "BreakFalseDeps", false, + false) + /// Translate TRI register number to a list of indices into our smaller tables /// of interesting registers. iterator_range::const_iterator> -ExecutionDepsFix::regIndices(unsigned Reg) const { +ExecutionDomainFix::regIndices(unsigned Reg) const { assert(Reg < AliasMap.size() && "Invalid register"); const auto &Entry = AliasMap[Reg]; return make_range(Entry.begin(), Entry.end()); } -DomainValue *ExecutionDepsFix::alloc(int domain) { +DomainValue *ExecutionDomainFix::alloc(int domain) { DomainValue *dv = Avail.empty() ? new(Allocator.Allocate()) DomainValue : Avail.pop_back_val(); @@ -47,7 +58,7 @@ /// Release a reference to DV. When the last reference is released, /// collapse if needed. -void ExecutionDepsFix::release(DomainValue *DV) { +void ExecutionDomainFix::release(DomainValue *DV) { while (DV) { assert(DV->Refs && "Bad DomainValue"); if (--DV->Refs) @@ -67,7 +78,7 @@ /// Follow the chain of dead DomainValues until a live DomainValue is reached. /// Update the referenced pointer when necessary. -DomainValue *ExecutionDepsFix::resolve(DomainValue *&DVRef) { +DomainValue *ExecutionDomainFix::resolve(DomainValue *&DVRef) { DomainValue *DV = DVRef; if (!DV || !DV->Next) return DV; @@ -84,7 +95,7 @@ } /// Set LiveRegs[rx] = dv, updating reference counts. -void ExecutionDepsFix::setLiveReg(int rx, DomainValue *dv) { +void ExecutionDomainFix::setLiveReg(int rx, DomainValue *dv) { assert(unsigned(rx) < NumRegs && "Invalid index"); assert(LiveRegs && "Must enter basic block first."); @@ -96,7 +107,7 @@ } // Kill register rx, recycle or collapse any DomainValue. -void ExecutionDepsFix::kill(int rx) { +void ExecutionDomainFix::kill(int rx) { assert(unsigned(rx) < NumRegs && "Invalid index"); assert(LiveRegs && "Must enter basic block first."); if (!LiveRegs[rx].Value) @@ -107,7 +118,7 @@ } /// Force register rx into domain. -void ExecutionDepsFix::force(int rx, unsigned domain) { +void ExecutionDomainFix::force(int rx, unsigned domain) { assert(unsigned(rx) < NumRegs && "Invalid index"); assert(LiveRegs && "Must enter basic block first."); if (DomainValue *dv = LiveRegs[rx].Value) { @@ -130,7 +141,7 @@ /// Collapse open DomainValue into given domain. If there are multiple /// registers using dv, they each get a unique collapsed DomainValue. -void ExecutionDepsFix::collapse(DomainValue *dv, unsigned domain) { +void ExecutionDomainFix::collapse(DomainValue *dv, unsigned domain) { assert(dv->hasDomain(domain) && "Cannot collapse"); // Collapse all the instructions. @@ -146,7 +157,7 @@ } /// All instructions and registers in B are moved to A, and B is released. -bool ExecutionDepsFix::merge(DomainValue *A, DomainValue *B) { +bool ExecutionDomainFix::merge(DomainValue *A, DomainValue *B) { assert(!A->isCollapsed() && "Cannot merge into collapsed"); assert(!B->isCollapsed() && "Cannot merge from collapsed"); if (A == B) @@ -172,32 +183,34 @@ } /// Set up LiveRegs by merging predecessor live-out values. -void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { +void ReachingDefAnalysis::enterBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + + MachineBasicBlock *MBB = TraversedMBB.MBB; + int MBBNumber = MBB->getNumber(); + MBBReachingDefs[MBBNumber].resize(NumRegUnits); + // Reset instruction counter in each basic block. CurInstr = 0; - // Set up UndefReads to track undefined register reads. - UndefReads.clear(); - LiveRegSet.clear(); - // Set up LiveRegs to represent registers entering MBB. if (!LiveRegs) - LiveRegs = new LiveReg[NumRegs]; + LiveRegs = new LiveReg[NumRegUnits]; // Default values are 'nothing happened a long time ago'. - for (unsigned rx = 0; rx != NumRegs; ++rx) { - LiveRegs[rx].Value = nullptr; + for (unsigned rx = 0; rx != NumRegUnits; ++rx) { LiveRegs[rx].Def = -(1 << 20); } // This is the entry block. if (MBB->pred_empty()) { for (const auto &LI : MBB->liveins()) { - for (int rx : regIndices(LI.PhysReg)) { + for (MCRegUnitIterator rx(LI.PhysReg, TRI); rx.isValid(); ++rx) { // Treat function live-ins as if they were defined just before the first // instruction. Usually, function arguments are set up immediately // before the call. - LiveRegs[rx].Def = -1; + LiveRegs[*rx].Def = -1; + MBBReachingDefs[MBBNumber][*rx].push_back(LiveRegs[*rx].Def); } } DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n"); @@ -207,20 +220,64 @@ // Try to coalesce live-out registers from predecessors. for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), pe = MBB->pred_end(); pi != pe; ++pi) { - auto fi = MBBInfos.find(*pi); - assert(fi != MBBInfos.end() && + auto fi = MBBOutRegsInfos.find(*pi); + assert(fi != MBBOutRegsInfos.end() && "Should have pre-allocated MBBInfos for all MBBs"); - LiveReg *Incoming = fi->second.OutRegs; + LiveReg *Incoming = fi->second; // Incoming is null if this is a backedge from a BB // we haven't processed yet if (Incoming == nullptr) { continue; } - for (unsigned rx = 0; rx != NumRegs; ++rx) { + for (unsigned rx = 0; rx != NumRegUnits; ++rx) { // Use the most recent predecessor def for each register. LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, Incoming[rx].Def); + if ((LiveRegs[rx].Def != -(1 << 20))) + MBBReachingDefs[MBBNumber][rx].push_back(LiveRegs[rx].Def); + } + } + + DEBUG( + dbgs() << printMBBReference(*MBB) + << (!TraversedMBB.IsDone ? ": incomplete\n" : ": all preds known\n")); +} +/// Set up LiveRegs by merging predecessor live-out values. +void ExecutionDomainFix::enterBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + + MachineBasicBlock *MBB = TraversedMBB.MBB; + + // Set up LiveRegs to represent registers entering MBB. + if (!LiveRegs) + LiveRegs = new LiveReg[NumRegs]; + + // Default values are 'nothing happened a long time ago'. + for (unsigned rx = 0; rx != NumRegs; ++rx) { + LiveRegs[rx].Value = nullptr; + } + + // This is the entry block. + if (MBB->pred_empty()) { + DEBUG(dbgs() << printMBBReference(*MBB) << ": entry\n"); + return; + } + + // Try to coalesce live-out registers from predecessors. + for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), + pe = MBB->pred_end(); pi != pe; ++pi) { + auto fi = MBBOutRegsInfos.find(*pi); + assert(fi != MBBOutRegsInfos.end() && + "Should have pre-allocated MBBInfos for all MBBs"); + LiveReg *Incoming = fi->second; + // Incoming is null if this is a backedge from a BB + // we haven't processed yet + if (Incoming == nullptr) { + continue; + } + + for (unsigned rx = 0; rx != NumRegs; ++rx) { DomainValue *pdv = resolve(Incoming[rx].Value); if (!pdv) continue; @@ -247,21 +304,30 @@ } DEBUG( dbgs() << printMBBReference(*MBB) - << (!isBlockDone(MBB) ? ": incomplete\n" : ": all preds known\n")); + << (!TraversedMBB.IsDone ? ": incomplete\n" : ": all preds known\n")); } -void ExecutionDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { +void ReachingDefAnalysis::leaveBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { assert(LiveRegs && "Must enter basic block first."); - LiveReg *OldOutRegs = MBBInfos[MBB].OutRegs; // Save register clearances at end of MBB - used by enterBasicBlock(). - MBBInfos[MBB].OutRegs = LiveRegs; + MBBOutRegsInfos[TraversedMBB.MBB] = LiveRegs; // While processing the basic block, we kept `Def` relative to the start // of the basic block for convenience. However, future use of this information // only cares about the clearance from the end of the block, so adjust // everything to be relative to the end of the basic block. - for (unsigned i = 0, e = NumRegs; i != e; ++i) + for (unsigned i = 0, e = NumRegUnits; i != e; ++i) LiveRegs[i].Def -= CurInstr; + LiveRegs = nullptr; +} + +void ExecutionDomainFix::leaveBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + assert(LiveRegs && "Must enter basic block first."); + LiveReg *OldOutRegs = MBBOutRegsInfos[TraversedMBB.MBB]; + // Save register clearances at end of MBB - used by enterBasicBlock(). + MBBOutRegsInfos[TraversedMBB.MBB] = LiveRegs; if (OldOutRegs) { // This must be the second pass. // Release all the DomainValues instead of keeping them. @@ -272,7 +338,7 @@ LiveRegs = nullptr; } -bool ExecutionDepsFix::visitInstr(MachineInstr *MI) { +bool ExecutionDomainFix::visitInstr(MachineInstr *MI) { // Update instructions with explicit execution domains. std::pair DomP = TII->getExecutionDomain(*MI); if (DomP.first) { @@ -290,16 +356,22 @@ /// is truly dependent on, or use a register with clearance higher than Pref. /// Returns true if it was able to find a true dependency, thus not requiring /// a dependency breaking instruction regardless of clearance. -bool ExecutionDepsFix::pickBestRegisterForUndef(MachineInstr *MI, +bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, unsigned Pref) { MachineOperand &MO = MI->getOperand(OpIdx); assert(MO.isUndef() && "Expected undef machine operand"); unsigned OriginalReg = MO.getReg(); - // Update only undef operands that are mapped to one register. - if (AliasMap[OriginalReg].size() != 1) - return false; + // Update only undef operands that have reg units that are mapped to one root. + for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) { + unsigned NumRoots = 0; + for (MCRegUnitRootIterator Root(*Unit, TRI); Root.isValid(); ++Root) { + NumRoots++; + if (NumRoots > 1) + return false; + } + } // Get the undef operand's register class const TargetRegisterClass *OpRC = @@ -323,10 +395,7 @@ unsigned MaxClearanceReg = OriginalReg; ArrayRef Order = RegClassInfo.getOrder(OpRC); for (auto Reg : Order) { - assert(AliasMap[Reg].size() == 1 && - "Reg is expected to be mapped to a single index"); - int RCrx = *regIndices(Reg).begin(); - unsigned Clearance = CurInstr - LiveRegs[RCrx].Def; + unsigned Clearance = RDA->getClearance(MI, Reg); if (Clearance <= MaxClearance) continue; MaxClearance = Clearance; @@ -345,44 +414,26 @@ /// \brief Return true to if it makes sense to break dependence on a partial def /// or undef use. -bool ExecutionDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, - unsigned Pref) { +bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, + unsigned Pref) { unsigned reg = MI->getOperand(OpIdx).getReg(); - for (int rx : regIndices(reg)) { - unsigned Clearance = CurInstr - LiveRegs[rx].Def; - DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + unsigned Clearance = RDA->getClearance(MI, reg); + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); - if (Pref > Clearance) { - DEBUG(dbgs() << ": Break dependency.\n"); - continue; - } - DEBUG(dbgs() << ": OK .\n"); - return false; + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + return true; } - return true; + DEBUG(dbgs() << ": OK .\n"); + return false; } // Update def-ages for registers defined by MI. // If Kill is set, also kill off DomainValues clobbered by the defs. // // Also break dependencies on partial defs and undef uses. -void ExecutionDepsFix::processDefs(MachineInstr *MI, bool breakDependency, - bool Kill) { +void ExecutionDomainFix::processDefs(MachineInstr *MI, bool Kill) { assert(!MI->isDebugValue() && "Won't process debug values"); - - // Break dependence on undef uses. Do this before updating LiveRegs below. - unsigned OpNum; - if (breakDependency) { - unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); - if (Pref) { - bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref); - // We don't need to bother trying to break a dependency if this - // instruction has a true dependency on that register through another - // operand - we'll have to wait for it to be available regardless. - if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref)) - UndefReads.push_back(std::make_pair(MI, OpNum)); - } - } const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); @@ -394,35 +445,85 @@ continue; for (int rx : regIndices(MO.getReg())) { // This instruction explicitly defines rx. - DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << CurInstr - << '\t' << *MI); - - if (breakDependency) { - // Check clearance before partial register updates. - // Call breakDependence before setting LiveRegs[rx].Def. - unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI); - if (Pref && shouldBreakDependence(MI, i, Pref)) - TII->breakPartialRegDependency(*MI, i, TRI); - } - - // How many instructions since rx was last written? - LiveRegs[rx].Def = CurInstr; + DEBUG(dbgs() << printReg(RC->getRegister(rx), TRI) << ":\t" << *MI); // Kill off domains redefined by generic instructions. if (Kill) kill(rx); } } +} + +// Update def-ages for registers defined by MI. +// Also break dependencies on partial defs and undef uses. +void ReachingDefAnalysis::processDefs(MachineInstr *MI) { + assert(!MI->isDebugValue() && "Won't process debug values"); + + int MBBNumber = MI->getParent()->getNumber(); + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, + e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isUse()) + continue; + for (MCRegUnitIterator rx(MO.getReg(), TRI); rx.isValid(); ++rx) { + // This instruction explicitly defines rx. + DEBUG(dbgs() << printReg(MO.getReg(), TRI) << ":\t" << CurInstr << '\t' + << *MI); + + // How many instructions since this reg unit was last written? + LiveRegs[*rx].Def = CurInstr; + MBBReachingDefs[MBBNumber][*rx].push_back(CurInstr); + } + } + InstIds[MI] = CurInstr; ++CurInstr; } +// Update def-ages for registers defined by MI. +// Also break dependencies on partial defs and undef uses. +void BreakFalseDeps::processDefs(MachineInstr *MI) { + assert(!MI->isDebugValue() && "Won't process debug values"); + + // Break dependence on undef uses. Do this before updating LiveRegs below. + unsigned OpNum; + unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI); + if (Pref) { + bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref); + // We don't need to bother trying to break a dependency if this + // instruction has a true dependency on that register through another + // operand - we'll have to wait for it to be available regardless. + if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref)) + UndefReads.push_back(std::make_pair(MI, OpNum)); + } + + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, + e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isUse()) + continue; + // Check clearance before partial register updates. + // Call breakDependence before setting LiveRegs[rx].Def. + unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI); + if (Pref && shouldBreakDependence(MI, i, Pref)) + TII->breakPartialRegDependency(*MI, i, TRI); + } +} + /// \break Break false dependencies on undefined register reads. /// /// Walk the block backward computing precise liveness. This is expensive, so we /// only do it on demand. Note that the occurrence of undefined register reads /// that should be broken is very rare, but when they occur we may have many in /// a single block. -void ExecutionDepsFix::processUndefReads(MachineBasicBlock *MBB) { +void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { if (UndefReads.empty()) return; @@ -455,7 +556,7 @@ // A hard instruction only works in one domain. All input registers will be // forced into that domain. -void ExecutionDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { +void ExecutionDomainFix::visitHardInstr(MachineInstr *mi, unsigned domain) { // Collapse all uses. for (unsigned i = mi->getDesc().getNumDefs(), e = mi->getDesc().getNumOperands(); i != e; ++i) { @@ -478,7 +579,7 @@ } // A soft instruction can be changed to work in other domains given by mask. -void ExecutionDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { +void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Bitmask of available domains for this instruction after taking collapsed // operands into account. unsigned available = mask; @@ -525,7 +626,8 @@ SmallVector Regs; for (int rx : used) { assert(LiveRegs && "no space allocated for live registers"); - const LiveReg &LR = LiveRegs[rx]; + LiveReg &LR = LiveRegs[rx]; + LR.Def = RDA->getReachingDef(mi, RC->getRegister(rx)); // This useless DomainValue could have been missed above. if (!LR.Value->getCommonDomains(available)) { kill(rx); @@ -589,71 +691,58 @@ } } -void ExecutionDepsFix::processBasicBlock(MachineBasicBlock *MBB, - bool PrimaryPass) { - enterBasicBlock(MBB); +void ExecutionDomainFix::processBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + enterBasicBlock(TraversedMBB); // If this block is not done, it makes little sense to make any decisions // based on clearance information. We need to make a second pass anyway, // and by then we'll have better information, so we can avoid doing the work // to try and break dependencies now. - bool breakDependency = isBlockDone(MBB); - for (MachineInstr &MI : *MBB) { + for (MachineInstr &MI : *TraversedMBB.MBB) { if (!MI.isDebugValue()) { bool Kill = false; - if (PrimaryPass) + if (TraversedMBB.PrimaryPass) Kill = visitInstr(&MI); - processDefs(&MI, breakDependency, Kill); + processDefs(&MI, Kill); } } - if (breakDependency) - processUndefReads(MBB); - leaveBasicBlock(MBB); + leaveBasicBlock(TraversedMBB); } -bool ExecutionDepsFix::isBlockDone(MachineBasicBlock *MBB) { - return MBBInfos[MBB].PrimaryCompleted && - MBBInfos[MBB].IncomingCompleted == MBBInfos[MBB].PrimaryIncoming && - MBBInfos[MBB].IncomingProcessed == MBB->pred_size(); +void ReachingDefAnalysis::processBasicBlock( + const LoopTraversal::TraversedMBBInfo &TraversedMBB) { + enterBasicBlock(TraversedMBB); + for (MachineInstr &MI : *TraversedMBB.MBB) { + if (!MI.isDebugValue()) + processDefs(&MI); + } + leaveBasicBlock(TraversedMBB); } -bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) { - if (skipFunction(mf.getFunction())) - return false; - MF = &mf; - TII = MF->getSubtarget().getInstrInfo(); - TRI = MF->getSubtarget().getRegisterInfo(); - RegClassInfo.runOnMachineFunction(mf); - LiveRegs = nullptr; - assert(NumRegs == RC->getNumRegs() && "Bad regclass"); - - DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " - << TRI->getRegClassName(RC) << " **********\n"); - - // If no relevant registers are used in the function, we can skip it - // completely. - bool anyregs = false; - const MachineRegisterInfo &MRI = mf.getRegInfo(); - for (unsigned Reg : *RC) { - if (MRI.isPhysRegUsed(Reg)) { - anyregs = true; - break; +void BreakFalseDeps::processBasicBlock(MachineBasicBlock* MBB) { + UndefReads.clear(); + // If this block is not done, it makes little sense to make any decisions + // based on clearance information. We need to make a second pass anyway, + // and by then we'll have better information, so we can avoid doing the work + // to try and break dependencies now. + for (MachineInstr &MI : *MBB) { + if (!MI.isDebugValue()) { + processDefs(&MI); } } - if (!anyregs) return false; + processUndefReads(MBB); +} - // Initialize the AliasMap on the first use. - if (AliasMap.empty()) { - // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and - // therefore the LiveRegs array. - AliasMap.resize(TRI->getNumRegs()); - for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) - for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); - AI.isValid(); ++AI) - AliasMap[*AI].push_back(i); - } +bool LoopTraversal::isBlockDone(MachineBasicBlock *MBB) { + return MBBInfos[MBB].PrimaryCompleted && + MBBInfos[MBB].IncomingCompleted == MBBInfos[MBB].PrimaryIncoming && + MBBInfos[MBB].IncomingProcessed == MBB->pred_size(); +} +SmallVector +LoopTraversal::traverse(MachineFunction &MF) { // Initialize the MMBInfos - for (auto &MBB : mf) { + for (auto &MBB : MF) { MBBInfo InitialInfo; MBBInfos.insert(std::make_pair(&MBB, InitialInfo)); } @@ -686,9 +775,10 @@ * any successors that are now done. */ - MachineBasicBlock *Entry = &*MF->begin(); + MachineBasicBlock *Entry = &*MF.begin(); ReversePostOrderTraversal RPOT(Entry); SmallVector Workqueue; + SmallVector MBBTraversalOrder; for (ReversePostOrderTraversal::rpo_iterator MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock *MBB = *MBBI; @@ -701,8 +791,8 @@ while (!Workqueue.empty()) { MachineBasicBlock *ActiveMBB = &*Workqueue.back(); Workqueue.pop_back(); - processBasicBlock(ActiveMBB, Primary); bool Done = isBlockDone(ActiveMBB); + MBBTraversalOrder.push_back(TraversedMBBInfo(ActiveMBB, Primary, Done)); for (auto *Succ : ActiveMBB->successors()) { if (!isBlockDone(Succ)) { if (Primary) { @@ -729,27 +819,166 @@ MBBI != MBBE; ++MBBI) { MachineBasicBlock *MBB = *MBBI; if (!isBlockDone(MBB)) { - processBasicBlock(MBB, false); + MBBTraversalOrder.push_back(TraversedMBBInfo(MBB, false, true)); // Don't update successors here. We'll get to them anyway through this // loop. } } - // Clear the LiveOuts vectors and collapse any remaining DomainValues. - for (ReversePostOrderTraversal::rpo_iterator - MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { - auto FI = MBBInfos.find(*MBBI); - if (FI == MBBInfos.end() || !FI->second.OutRegs) + MBBInfos.clear(); + + return MBBTraversalOrder; +} + +bool ExecutionDomainFix::runOnMachineFunction(MachineFunction &mf) { + if (skipFunction(mf.getFunction())) + return false; + MF = &mf; + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + LiveRegs = nullptr; + assert(NumRegs == RC->getNumRegs() && "Bad regclass"); + + RDA = &getAnalysis(); + + DEBUG(dbgs() << "********** FIX EXECUTION DOMAIN: " + << TRI->getRegClassName(RC) << " **********\n"); + + // If no relevant registers are used in the function, we can skip it + // completely. + bool anyregs = false; + const MachineRegisterInfo &MRI = mf.getRegInfo(); + for (unsigned Reg : *RC) { + if (MRI.isPhysRegUsed(Reg)) { + anyregs = true; + break; + } + } + if (!anyregs) return false; + + // Initialize the AliasMap on the first use. + if (AliasMap.empty()) { + // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and + // therefore the LiveRegs array. + AliasMap.resize(TRI->getNumRegs()); + for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) + for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); + AI.isValid(); ++AI) + AliasMap[*AI].push_back(i); + } + + // Initialize the MBBOutRegsInfos + for (auto &MBB : mf) { + MBBOutRegsInfos.insert(std::make_pair(&MBB, nullptr)); + } + + // Traverse the basic blocks. + LoopTraversal Traversal; + SmallVector TraversedMBBInfoOrder = + Traversal.traverse(mf); + for (auto TraversedMBB : TraversedMBBInfoOrder) { + processBasicBlock(TraversedMBB); + } + + for (auto MBBOutRegs : MBBOutRegsInfos) { + if (!MBBOutRegs.second) continue; for (unsigned i = 0, e = NumRegs; i != e; ++i) - if (FI->second.OutRegs[i].Value) - release(FI->second.OutRegs[i].Value); - delete[] FI->second.OutRegs; + if (MBBOutRegs.second[i].Value) + release(MBBOutRegs.second[i].Value); + delete[] MBBOutRegs.second; } - MBBInfos.clear(); - UndefReads.clear(); + MBBOutRegsInfos.clear(); Avail.clear(); Allocator.DestroyAll(); return false; } + +bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) { + if (skipFunction(mf.getFunction())) + return false; + MF = &mf; + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + + LiveRegs = nullptr; + NumRegUnits = TRI->getNumRegUnits(); + + MBBReachingDefs.resize(mf.getNumBlockIDs()); + + DEBUG(dbgs() << "********** REACHING DEFINITION ANALYSIS **********\n"); + + // Initialize the MBBOutRegsInfos + for (auto &MBB : mf) { + MBBOutRegsInfos.insert(std::make_pair(&MBB, nullptr)); + } + + // Traverse the basic blocks. + LoopTraversal Traversal; + SmallVector TraversedMBBInfoOrder = + Traversal.traverse(mf); + for (auto TraversedMBB : TraversedMBBInfoOrder) { + processBasicBlock(TraversedMBB); + } + + // Sorting all reaching defs found for a ceartin reg unit in a given BB. + for (MBBDefsInfo &MBBDefs : MBBReachingDefs) { + for (MBBRegUnitDefs &RegUnitDefs : MBBDefs) + std::sort(RegUnitDefs.begin(), RegUnitDefs.end()); + } + + return false; +} + +void ReachingDefAnalysis::releaseMemory() { + // Clear the LiveOuts vectors and collapse any remaining DomainValues. + for (auto MBBOutRegs : MBBOutRegsInfos) { + if (!MBBOutRegs.second) + continue; + delete[] MBBOutRegs.second; + } + MBBOutRegsInfos.clear(); + MBBReachingDefs.clear(); + InstIds.clear(); +} + +bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) { + if (skipFunction(mf.getFunction())) + return false; + MF = &mf; + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + RDA = &getAnalysis(); + + RegClassInfo.runOnMachineFunction(mf); + + DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n"); + + // Traverse the basic blocks. + for (MachineBasicBlock &MBB : mf) { + processBasicBlock(&MBB); + } + + return false; +} + +int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) { + int InstId = InstIds[MI]; + int DefRes = -(1 << 20); + int MBBNumber = MI->getParent()->getNumber(); + int LatestDef = -(1 << 20); + for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) { + for (int Def : MBBReachingDefs[MBBNumber][*Unit]) { + if (Def >= InstId) + break; + DefRes = Def; + } + LatestDef = std::max(LatestDef, DefRes); + } + return LatestDef; +} + +int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) { + return InstIds[MI] - getReachingDef(MI, PhysReg); +} Index: llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp +++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp @@ -75,7 +75,7 @@ cl::desc("Enable the global merge pass")); namespace llvm { - void initializeARMExecutionDepsFixPass(PassRegistry&); + void initializeARMExecutionDomainFixPass(PassRegistry&); } extern "C" void LLVMInitializeARMTarget() { @@ -90,7 +90,7 @@ initializeARMLoadStoreOptPass(Registry); initializeARMPreAllocLoadStoreOptPass(Registry); initializeARMConstantIslandsPass(Registry); - initializeARMExecutionDepsFixPass(Registry); + initializeARMExecutionDomainFixPass(Registry); initializeARMExpandPseudoPass(Registry); initializeThumb2SizeReducePass(Registry); } @@ -355,20 +355,23 @@ void addPreEmitPass() override; }; -class ARMExecutionDepsFix : public ExecutionDepsFix { +class ARMExecutionDomainFix : public ExecutionDomainFix { public: static char ID; - ARMExecutionDepsFix() : ExecutionDepsFix(ID, ARM::DPRRegClass) {} + ARMExecutionDomainFix() : ExecutionDomainFix(ID, ARM::DPRRegClass) {} StringRef getPassName() const override { - return "ARM Execution Dependency Fix"; + return "ARM Execution Domain Fix"; } }; -char ARMExecutionDepsFix::ID; +char ARMExecutionDomainFix::ID; } // end anonymous namespace -INITIALIZE_PASS(ARMExecutionDepsFix, "arm-execution-deps-fix", - "ARM Execution Dependency Fix", false, false) +INITIALIZE_PASS_BEGIN(ARMExecutionDomainFix, "arm-execution-domain-fix", + "ARM Execution Domain Fix", false, false) +INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) +INITIALIZE_PASS_END(ARMExecutionDomainFix, "arm-execution-domain-fix", + "ARM Execution Domain Fix", false, false) TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { return new ARMPassConfig(*this, PM); @@ -462,7 +465,8 @@ if (EnableARMLoadStoreOpt) addPass(createARMLoadStoreOptimizationPass()); - addPass(new ARMExecutionDepsFix()); + addPass(new ARMExecutionDomainFix()); + addPass(new BreakFalseDeps()); } // Expand some pseudo instructions into multiple instructions to allow Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -20928,7 +20928,7 @@ SDValue Segment = DAG.getRegister(0, MVT::i32); // If source is undef or we know it won't be used, use a zero vector // to break register dependency. - // TODO: use undef instead and let ExecutionDepsFix deal with it? + // TODO: use undef instead and let BreakFalseDeps deal with it? if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode())) Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); SDValue Ops[] = {Src, Base, Scale, Index, Disp, Segment, Mask, Chain}; @@ -20956,7 +20956,7 @@ SDValue Segment = DAG.getRegister(0, MVT::i32); // If source is undef or we know it won't be used, use a zero vector // to break register dependency. - // TODO: use undef instead and let ExecutionDepsFix deal with it? + // TODO: use undef instead and let BreakFalseDeps deal with it? if (Src.isUndef() || ISD::isBuildVectorAllOnes(VMask.getNode())) Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); SDValue Ops[] = {Src, VMask, Base, Scale, Index, Disp, Segment, Chain}; Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -439,7 +439,7 @@ // Alias instruction that maps zero vector to pxor / xorp* for AVX-512. // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then -// swizzled by ExecutionDepsFix to pxor. +// swizzled by ExecutionDomainFix to pxor. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp @@ -8105,7 +8105,7 @@ return false; } -/// Inform the ExecutionDepsFix pass how many idle +/// Inform the BreakFalseDeps pass how many idle /// instructions we would like before a partial register update. unsigned X86InstrInfo::getPartialRegUpdateClearance( const MachineInstr &MI, unsigned OpNum, @@ -8262,7 +8262,7 @@ return false; } -/// Inform the ExecutionDepsFix pass how many idle instructions we would like +/// Inform the BreakFalseDeps pass how many idle instructions we would like /// before certain undef register reads. /// /// This catches the VCVTSI2SD family of instructions: Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -360,7 +360,7 @@ // Alias instruction that maps zero vector to pxor / xorp* for sse. // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then -// swizzled by ExecutionDepsFix to pxor. +// swizzled by ExecutionDomainFix to pxor. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, @@ -3218,7 +3218,7 @@ // which has a clobber before the rcp, vs. // vrcpss mem, %xmm0, %xmm0 // TODO: In theory, we could fold the load, and avoid the stall caused by - // the partial register store, either in ExecutionDepsFix or with smarter RA. + // the partial register store, either in BreakFalseDeps or with smarter RA. let Predicates = [target] in { def : Pat<(OpNode RC:$src), (!cast("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)), RC:$src)>; Index: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp @@ -75,7 +75,7 @@ bool X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { - // ExecutionDepsFixer and PostRAScheduler require liveness. + // ExecutionDomainFix, BreakFalseDeps and PostRAScheduler require liveness. return true; } Index: llvm/trunk/lib/Target/X86/X86TargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86TargetMachine.cpp +++ llvm/trunk/lib/Target/X86/X86TargetMachine.cpp @@ -60,7 +60,7 @@ void initializeFixupLEAPassPass(PassRegistry &); void initializeX86CallFrameOptimizationPass(PassRegistry &); void initializeX86CmovConverterPassPass(PassRegistry &); -void initializeX86ExecutionDepsFixPass(PassRegistry &); +void initializeX86ExecutionDomainFixPass(PassRegistry &); void initializeX86DomainReassignmentPass(PassRegistry &); } // end namespace llvm @@ -78,7 +78,7 @@ initializeFixupLEAPassPass(PR); initializeX86CallFrameOptimizationPass(PR); initializeX86CmovConverterPassPass(PR); - initializeX86ExecutionDepsFixPass(PR); + initializeX86ExecutionDomainFixPass(PR); initializeX86DomainReassignmentPass(PR); } @@ -342,20 +342,23 @@ void addPreSched2() override; }; -class X86ExecutionDepsFix : public ExecutionDepsFix { +class X86ExecutionDomainFix : public ExecutionDomainFix { public: static char ID; - X86ExecutionDepsFix() : ExecutionDepsFix(ID, X86::VR128XRegClass) {} + X86ExecutionDomainFix() : ExecutionDomainFix(ID, X86::VR128XRegClass) {} StringRef getPassName() const override { return "X86 Execution Dependency Fix"; } }; -char X86ExecutionDepsFix::ID; +char X86ExecutionDomainFix::ID; } // end anonymous namespace -INITIALIZE_PASS(X86ExecutionDepsFix, "x86-execution-deps-fix", - "X86 Execution Dependency Fix", false, false) +INITIALIZE_PASS_BEGIN(X86ExecutionDomainFix, "x86-execution-domain-fix", + "X86 Execution Domain Fix", false, false) +INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) +INITIALIZE_PASS_END(X86ExecutionDomainFix, "x86-execution-domain-fix", + "X86 Execution Domain Fix", false, false) TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { return new X86PassConfig(*this, PM); @@ -441,8 +444,10 @@ void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); } void X86PassConfig::addPreEmitPass() { - if (getOptLevel() != CodeGenOpt::None) - addPass(new X86ExecutionDepsFix()); + if (getOptLevel() != CodeGenOpt::None) { + addPass(new X86ExecutionDomainFix()); + addPass(new BreakFalseDeps()); + } addPass(createX86IndirectBranchTrackingPass()); Index: llvm/trunk/test/CodeGen/ARM/deps-fix.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/deps-fix.ll +++ llvm/trunk/test/CodeGen/ARM/deps-fix.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard -mtriple armv7-linux-gnueabi | FileCheck %s -;; This test checks that the ExecutionDepsFix pass performs the domain changes +;; This test checks that the ExecutionDomainFix pass performs the domain changes ;; even when some dependencies are propagated through implicit definitions. ; CHECK: fun_a Index: llvm/trunk/test/CodeGen/X86/break-false-dep.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/break-false-dep.ll +++ llvm/trunk/test/CodeGen/X86/break-false-dep.ll @@ -67,7 +67,7 @@ ; SSE: for.body{{$}} ; ; This loop contains two cvtsi2ss instructions that update the same xmm -; register. Verify that the execution dependency fix pass breaks those +; register. Verify that the break false dependency fix pass breaks those ; dependencies by inserting xorps instructions. ; ; If the register allocator chooses different registers for the two cvtsi2ss @@ -141,7 +141,7 @@ ; This loop contains a cvtsi2sd instruction that has a loop-carried ; false dependency on an xmm that is modified by other scalar instructions ; that follow it in the loop. Additionally, the source of convert is a -; memory operand. Verify the execution dependency fix pass breaks this +; memory operand. Verify the break false dependency fix pass breaks this ; dependency by inserting a xor before the convert. @x = common global [1024 x double] zeroinitializer, align 16 @y = common global [1024 x double] zeroinitializer, align 16