Index: llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h +++ llvm/trunk/include/llvm/CodeGen/CalcSpillWeights.h @@ -66,6 +66,32 @@ /// \brief (re)compute li's spill weight and allocation hint. void calculateSpillWeightAndHint(LiveInterval &li); + + /// \brief Compute future expected spill weight of a split artifact of li + /// that will span between start and end slot indexes. + /// \param li The live interval to be split. + /// \param start The expected begining of the split artifact. Instructions + /// before start will not affect the weight. + /// \param end The expected end of the split artifact. Instructions + /// after end will not affect the weight. + /// \return The expected spill weight of the split artifact. Returns + /// negative weight for unspillable li. + float futureWeight(LiveInterval &li, SlotIndex start, SlotIndex end); + + /// \brief Helper function for weight calculations. + /// (Re)compute li's spill weight and allocation hint, or, for non null + /// start and end - compute future expected spill weight of a split + /// artifact of li that will span between start and end slot indexes. + /// \param li The live interval for which to compute the weight. + /// \param start The expected begining of the split artifact. Instructions + /// before start will not affect the weight. Relevant for + /// weight calculation of future split artifact. + /// \param end The expected end of the split artifact. Instructions + /// after end will not affect the weight. Relevant for + /// weight calculation of future split artifact. + /// \return The spill weight. Returns negative weight for unspillable li. + float weightCalcHelper(LiveInterval &li, SlotIndex *start = nullptr, + SlotIndex *end = nullptr); }; /// \brief Compute spill weights and allocation hints for all virtual register Index: llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ llvm/trunk/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -107,6 +107,11 @@ const MachineBlockFrequencyInfo *MBFI, const MachineInstr &Instr); + /// Calculate the spill weight to assign to a single instruction. + static float getSpillWeight(bool isDef, bool isUse, + const MachineBlockFrequencyInfo *MBFI, + const MachineBasicBlock *MBB); + LiveInterval &getInterval(unsigned Reg) { if (hasInterval(Reg)) return *VirtRegIntervals[Reg]; Index: llvm/trunk/include/llvm/Target/TargetSubtargetInfo.h =================================================================== --- llvm/trunk/include/llvm/Target/TargetSubtargetInfo.h +++ llvm/trunk/include/llvm/Target/TargetSubtargetInfo.h @@ -221,6 +221,11 @@ /// a finer grain to tune the register allocator. virtual bool enableRALocalReassignment(CodeGenOpt::Level OptLevel) const; + /// \brief True if the subtarget should consider the cost of local intervals + /// created by a split candidate when choosing the best split candidate. This + /// heuristic may be compile time intensive. + virtual bool enableAdvancedRASplitCost() const; + /// \brief Enable use of alias analysis during code generation (during MI /// scheduling, DAGCombine, etc.). virtual bool useAA() const; Index: llvm/trunk/lib/CodeGen/CalcSpillWeights.cpp =================================================================== --- llvm/trunk/lib/CodeGen/CalcSpillWeights.cpp +++ llvm/trunk/lib/CodeGen/CalcSpillWeights.cpp @@ -133,8 +133,21 @@ return true; } -void -VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { +void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { + float weight = weightCalcHelper(li); + // Check if unspillable. + if (weight < 0) + return; + li.weight = weight; +} + +float VirtRegAuxInfo::futureWeight(LiveInterval &li, SlotIndex start, + SlotIndex end) { + return weightCalcHelper(li, &start, &end); +} + +float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, + SlotIndex *end) { MachineRegisterInfo &mri = MF.getRegInfo(); const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo(); MachineBasicBlock *mbb = nullptr; @@ -154,10 +167,38 @@ // Don't recompute spill weight for an unspillable register. bool Spillable = li.isSpillable(); + bool localSplitArtifact = start && end; + + // Do not update future local split artifacts. + bool updateLI = !localSplitArtifact; + + if (localSplitArtifact) { + MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*end); + assert(localMBB == LIS.getMBBFromIndex(*start) && + "start and end are expected to be in the same basic block"); + + // Local split artifact will have 2 additional copy instructions and they + // will be in the same BB. + // localLI = COPY other + // ... + // other = COPY localLI + totalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB); + totalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB); + + numInstr += 2; + } + for (MachineRegisterInfo::reg_instr_iterator I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); I != E; ) { MachineInstr *mi = &*(I++); + + // For local split artifacts, we are interested only in instructions between + // the expected start and end of the range. + SlotIndex si = LIS.getInstructionIndex(*mi); + if (localSplitArtifact && ((si < *start) || (si > *end))) + continue; + numInstr++; if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) continue; @@ -212,23 +253,25 @@ Hint.clear(); // Always prefer the physreg hint. - if (unsigned hint = hintPhys ? hintPhys : hintVirt) { - mri.setRegAllocationHint(li.reg, 0, hint); - // Weakly boost the spill weight of hinted registers. - totalWeight *= 1.01F; + if (updateLI) { + if (unsigned hint = hintPhys ? hintPhys : hintVirt) { + mri.setRegAllocationHint(li.reg, 0, hint); + // Weakly boost the spill weight of hinted registers. + totalWeight *= 1.01F; + } } // If the live interval was already unspillable, leave it that way. if (!Spillable) - return; + return -1.0; // Mark li as unspillable if all live ranges are tiny and the interval // is not live at any reg mask. If the interval is live at a reg mask // spilling may be required. - if (li.isZeroLength(LIS.getSlotIndexes()) && + if (updateLI && li.isZeroLength(LIS.getSlotIndexes()) && !li.isLiveAtIndexes(LIS.getRegMaskSlots())) { li.markNotSpillable(); - return; + return -1.0; } // If all of the definitions of the interval are re-materializable, @@ -238,5 +281,7 @@ if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo())) totalWeight *= 0.5F; - li.weight = normalize(totalWeight, li.getSize(), numInstr); + if (localSplitArtifact) + return normalize(totalWeight, start->distance(*end), numInstr); + return normalize(totalWeight, li.getSize(), numInstr); } Index: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp =================================================================== --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -824,7 +824,13 @@ float LiveIntervals::getSpillWeight(bool isDef, bool isUse, const MachineBlockFrequencyInfo *MBFI, const MachineInstr &MI) { - BlockFrequency Freq = MBFI->getBlockFreq(MI.getParent()); + return getSpillWeight(isDef, isUse, MBFI, MI.getParent()); +} + +float LiveIntervals::getSpillWeight(bool isDef, bool isUse, + const MachineBlockFrequencyInfo *MBFI, + const MachineBasicBlock *MBB) { + BlockFrequency Freq = MBFI->getBlockFreq(MBB); const float Scale = 1.0f / MBFI->getEntryFreq(); return (isDef + isUse) * (Freq.getFrequency() * Scale); } Index: llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp +++ llvm/trunk/lib/CodeGen/RegAllocGreedy.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -129,6 +130,12 @@ cl::desc("Cost for first time use of callee-saved register."), cl::init(0), cl::Hidden); +static cl::opt ConsiderLocalIntervalCost( + "condsider-local-interval-cost", cl::Hidden, + cl::desc("Consider the cost of local intervals created by a split " + "candidate when choosing the best split candidate."), + cl::init(false)); + static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -277,6 +284,57 @@ } }; + /// EvictionTrack - Keeps track of past evictions in order to optimize region + /// split decision. + class EvictionTrack { + + public: + using EvictorInfo = + std::pair; + using EvicteeInfo = llvm::MapVector; + + private: + /// Each Vreg that has been evicted in the last stage of selectOrSplit will + /// be mapped to the evictor Vreg and the PhysReg it was evicted from. + EvicteeInfo Evictees; + + public: + /// \brief Clear all eviction information. + void clear() { Evictees.clear(); } + + /// \brief Clear eviction information for the given evictee Vreg. + /// E.g. when Vreg get's a new allocation, the old eviction info is no + /// longer relevant. + /// \param Evictee The evictee Vreg for whom we want to clear collected + /// eviction info. + void clearEvicteeInfo(unsigned Evictee) { Evictees.erase(Evictee); } + + /// \brief Track new eviction. + /// The Evictor vreg has evicted the Evictee vreg from Physreg. + /// \praram PhysReg The phisical register Evictee was evicted from. + /// \praram Evictor The evictor Vreg that evicted Evictee. + /// \praram Evictee The evictee Vreg. + void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) { + Evictees[Evictee].first = Evictor; + Evictees[Evictee].second = PhysReg; + } + + /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg. + /// \praram Evictee The evictee vreg. + /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if + /// nobody has evicted Evictee from PhysReg. + EvictorInfo getEvictor(unsigned Evictee) { + if (Evictees.count(Evictee)) { + return Evictees[Evictee]; + } + + return EvictorInfo(0, 0); + } + }; + + // Keeps track of past evictions in order to optimize region split decision. + EvictionTrack LastEvicted; + // splitting state. std::unique_ptr SA; std::unique_ptr SE; @@ -340,6 +398,10 @@ /// obtained from the TargetSubtargetInfo. bool EnableLocalReassign; + /// Enable or not the the consideration of the cost of local intervals created + /// by a split candidate when choosing the best split candidate. + bool EnableAdvancedRASplitCost; + /// Set of broken hints that may be reconciled later because of eviction. SmallSetVector SetOfBrokenHints; @@ -382,13 +444,24 @@ bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); void addThroughConstraints(InterferenceCache::Cursor, ArrayRef); void growRegion(GlobalSplitCandidate &Cand); - BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); + bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand, + unsigned BBNumber, + const AllocationOrder &Order); + BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &, + const AllocationOrder &Order, + bool *CanCauseEvictionChain); bool calcCompactRegion(GlobalSplitCandidate&); void splitAroundRegion(LiveRangeEdit&, ArrayRef); void calcGapWeights(unsigned, SmallVectorImpl&); unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); + bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg, + SlotIndex Start, SlotIndex End, + EvictionCost &MaxCost); + unsigned getCheapestEvicteeWeight(const AllocationOrder &Order, + LiveInterval &VirtReg, SlotIndex Start, + SlotIndex End, float *BestEvictWeight); void evictInterference(LiveInterval&, unsigned, SmallVectorImpl&); bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, @@ -405,7 +478,8 @@ unsigned calculateRegionSplitCost(LiveInterval &VirtReg, AllocationOrder &Order, BlockFrequency &BestCost, - unsigned &NumCands, bool IgnoreCSR); + unsigned &NumCands, bool IgnoreCSR, + bool *CanCauseEvictionChain = nullptr); /// Perform region splitting. unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, bool HasCompact, @@ -859,6 +933,92 @@ return true; } +/// \brief Return true if all interferences between VirtReg and PhysReg between +/// Start and End can be evicted. +/// +/// \param VirtReg Live range that is about to be assigned. +/// \param PhysReg Desired register for assignment. +/// \param Start Start of range to look for interferences. +/// \param End End of range to look for interferences. +/// \param MaxCost Only look for cheaper candidates and update with new cost +/// when returning true. +/// \return True when interference can be evicted cheaper than MaxCost. +bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, + unsigned PhysReg, SlotIndex Start, + SlotIndex End, + EvictionCost &MaxCost) { + EvictionCost Cost; + + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + + // Check if any interfering live range is heavier than MaxWeight. + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + + // Check if interference overlast the segment in interest. + if (!Intf->overlaps(Start, End)) + continue; + + // Cannot evict non virtual reg interference. + if (!TargetRegisterInfo::isVirtualRegister(Intf->reg)) + return false; + // Never evict spill products. They cannot split or spill. + if (getStage(*Intf) == RS_Done) + return false; + + // Would this break a satisfied hint? + bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); + // Update eviction cost. + Cost.BrokenHints += BreaksHint; + Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); + // Abort if this would be too expensive. + if (!(Cost < MaxCost)) + return false; + } + } + + if (Cost.MaxWeight == 0) + return false; + + MaxCost = Cost; + return true; +} + +/// \brief Return tthe physical register that will be best +/// candidate for eviction by a local split interval that will be created +/// between Start and End. +/// +/// \param Order The allocation order +/// \param VirtReg Live range that is about to be assigned. +/// \param Start Start of range to look for interferences +/// \param End End of range to look for interferences +/// \param BestEvictweight The eviction cost of that eviction +/// \return The PhysReg which is the best candidate for eviction and the +/// eviction cost in BestEvictweight +unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order, + LiveInterval &VirtReg, + SlotIndex Start, SlotIndex End, + float *BestEvictweight) { + EvictionCost BestEvictCost; + BestEvictCost.setMax(); + BestEvictCost.MaxWeight = VirtReg.weight; + unsigned BestEvicteePhys = 0; + + // Go over all physical registers and find the best candidate for eviction + for (auto PhysReg : Order.getOrder()) { + + if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End, + BestEvictCost)) + continue; + + // Best so far. + BestEvicteePhys = PhysReg; + } + *BestEvictweight = BestEvictCost.MaxWeight; + return BestEvicteePhys; +} + /// evictInterference - Evict any interferring registers that prevent VirtReg /// from being assigned to Physreg. This assumes that canEvictInterference /// returned true. @@ -893,6 +1053,9 @@ // The same VirtReg may be present in multiple RegUnits. Skip duplicates. if (!VRM->hasPhys(Intf->reg)) continue; + + LastEvicted.addEviction(PhysReg, VirtReg.reg, Intf->reg); + Matrix->unassign(*Intf); assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || VirtReg.isSpillable() < Intf->isSpillable()) && @@ -1214,13 +1377,117 @@ return Cost; } +/// \brief Check if splitting Evictee will create a local split interval in +/// basic block number BBNumber that may cause a bad eviction chain. This is +/// intended to prevent bad eviction sequences like: +/// movl %ebp, 8(%esp) # 4-byte Spill +/// movl %ecx, %ebp +/// movl %ebx, %ecx +/// movl %edi, %ebx +/// movl %edx, %edi +/// cltd +/// idivl %esi +/// movl %edi, %edx +/// movl %ebx, %edi +/// movl %ecx, %ebx +/// movl %ebp, %ecx +/// movl 16(%esp), %ebp # 4 - byte Reload +/// +/// Such sequences are created in 2 scenarios: +/// +/// Scenario #1: +/// vreg0 is evicted from physreg0 by vreg1. +/// Evictee vreg0 is intended for region splitting with split candidate +/// physreg0 (the reg vreg0 was evicted from). +/// Region splitting creates a local interval because of interference with the +/// evictor vreg1 (normally region spliitting creates 2 interval, the "by reg" +/// and "by stack" intervals and local interval created when interference +/// occurs). +/// One of the split intervals ends up evicting vreg2 from physreg1. +/// Evictee vreg2 is intended for region splitting with split candidate +/// physreg1. +/// One of the split intervals ends up evicting vreg3 from physreg2, etc. +/// +/// Scenario #2 +/// vreg0 is evicted from physreg0 by vreg1. +/// vreg2 is evicted from physreg2 by vreg3 etc. +/// Evictee vreg0 is intended for region splitting with split candidate +/// physreg1. +/// Region splitting creates a local interval because of interference with the +/// evictor vreg1. +/// One of the split intervals ends up evicting back original evictor vreg1 +/// from physreg0 (the reg vreg0 was evicted from). +/// Another evictee vreg2 is intended for region splitting with split candidate +/// physreg1. +/// One of the split intervals ends up evicting vreg3 from physreg2, etc. +/// +/// \param Evictee The register considered to be split. +/// \param Cand The split candidate that determines the physical register +/// we are splitting for and the interferences. +/// \param BBNumber The number of a BB for which the region split process will +/// create a local split interval. +/// \param Order The phisical registers that may get evicted by a split +/// artifact of Evictee. +/// \return True if splitting Evictee may cause a bad eviction chain, false +/// otherwise. +bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee, + GlobalSplitCandidate &Cand, + unsigned BBNumber, + const AllocationOrder &Order) { + EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee); + unsigned Evictor = VregEvictorInfo.first; + unsigned PhysReg = VregEvictorInfo.second; + + // No actual evictor. + if (!Evictor || !PhysReg) + return false; + + float MaxWeight = 0; + unsigned FutureEvictedPhysReg = + getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee), + Cand.Intf.first(), Cand.Intf.last(), &MaxWeight); + + // The bad eviction chain occurs when either the split candidate the the + // evited reg or one of the split artifact will evict the evicting reg. + if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg)) + return false; + + Cand.Intf.moveToBlock(BBNumber); + + // Check to see if the Evictor contains interference (with Evictee) in the + // given BB. If so, this interference caused the eviction of Evictee from + // PhysReg. This suggest that we will create a local interval during the + // region split to avoid this interference This local interval may cause a bad + // eviction chain. + if (!LIS->hasInterval(Evictor)) + return false; + LiveInterval &EvictorLI = LIS->getInterval(Evictor); + if (EvictorLI.FindSegmentContaining(Cand.Intf.first()) == EvictorLI.end()) + return false; + + // Now, check to see if the local interval we will create is going to be + // expensive enough to evict somebody If so, this may cause a bad eviction + // chain. + VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis(), *MBFI); + float splitArtifactWeight = + VRAI.futureWeight(LIS->getInterval(Evictee), + Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); + if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight) + return false; + + return true; +} + /// calcGlobalSplitCost - Return the global split cost of following the split /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// -BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { +BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, + const AllocationOrder &Order, + bool *CanCauseEvictionChain) { BlockFrequency GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; + unsigned VirtRegToSplit = SA->getParent().reg; ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -1229,6 +1496,24 @@ bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)]; unsigned Ins = 0; + Cand.Intf.moveToBlock(BC.Number); + // Check wheather a local interval is going to be created during the region + // split. + if (EnableAdvancedRASplitCost && CanCauseEvictionChain && + Cand.Intf.hasInterference() && BI.LiveIn && BI.LiveOut && RegIn && + RegOut) { + + if (splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) { + // This interfernce cause our eviction from this assignment, we might + // evict somebody else, add that cost. + // See splitCanCauseEvictionChain for detailed description of scenarios. + GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); + GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); + + *CanCauseEvictionChain = true; + } + } + if (BI.LiveIn) Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); if (BI.LiveOut) @@ -1249,6 +1534,20 @@ if (Cand.Intf.hasInterference()) { GlobalCost += SpillPlacer->getBlockFrequency(Number); GlobalCost += SpillPlacer->getBlockFrequency(Number); + + // Check wheather a local interval is going to be created during the + // region split. + if (EnableAdvancedRASplitCost && CanCauseEvictionChain && + splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) { + // This interfernce cause our eviction from this assignment, we might + // evict somebody else, add that cost. + // See splitCanCauseEvictionChain for detailed description of + // scenarios. + GlobalCost += SpillPlacer->getBlockFrequency(Number); + GlobalCost += SpillPlacer->getBlockFrequency(Number); + + *CanCauseEvictionChain = true; + } } continue; } @@ -1413,6 +1712,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { unsigned NumCands = 0; + BlockFrequency SpillCost = calcSpillCost(); BlockFrequency BestCost; // Check if we can split this live range around a compact region. @@ -1424,14 +1724,24 @@ } else { // No benefit from the compact region, our fallback will be per-block // splitting. Make sure we find a solution that is cheaper than spilling. - BestCost = calcSpillCost(); + BestCost = SpillCost; DEBUG(dbgs() << "Cost of isolating all blocks = "; MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); } + bool CanCauseEvictionChain = false; unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, - false/*IgnoreCSR*/); + false /*IgnoreCSR*/, &CanCauseEvictionChain); + + // Split candidates with compact regions can cause a bad eviction sequence. + // See splitCanCauseEvictionChain for detailed description of scenarios. + // To avoid it, we need to comapre the cost with the spill cost and not the + // current max frequency. + if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) && + CanCauseEvictionChain) { + return 0; + } // No solutions found, fall back to single block splitting. if (!HasCompact && BestCand == NoCand) @@ -1443,8 +1753,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, AllocationOrder &Order, BlockFrequency &BestCost, - unsigned &NumCands, - bool IgnoreCSR) { + unsigned &NumCands, bool IgnoreCSR, + bool *CanCauseEvictionChain) { unsigned BestCand = NoCand; Order.rewind(); while (unsigned PhysReg = Order.next()) { @@ -1504,7 +1814,8 @@ continue; } - Cost += calcGlobalSplitCost(Cand); + bool HasEvictionChain = false; + Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain); DEBUG({ dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; @@ -1515,9 +1826,24 @@ if (Cost < BestCost) { BestCand = NumCands; BestCost = Cost; + // See splitCanCauseEvictionChain for detailed description of bad + // eviction chain scenarios. + if (CanCauseEvictionChain) + *CanCauseEvictionChain = HasEvictionChain; } ++NumCands; } + + if (CanCauseEvictionChain && BestCand != NoCand) { + // See splitCanCauseEvictionChain for detailed description of bad + // eviction chain scenarios. + DEBUG(dbgs() << "Best split candidate of vreg " + << PrintReg(VirtReg.reg, TRI) << " may "); + if (!(*CanCauseEvictionChain)) + DEBUG(dbgs() << "not "); + DEBUG(dbgs() << "cause bad eviction chain\n"); + } + return BestCand; } @@ -2580,6 +2906,8 @@ // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { + // If VirtReg got an assignment, the eviction info is no longre relevant. + LastEvicted.clearEvicteeInfo(VirtReg.reg); // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical // register. @@ -2613,6 +2941,9 @@ // copy-related live-ranges. if (Hint && Hint != PhysReg) SetOfBrokenHints.insert(&VirtReg); + // If VirtReg eviction someone, the eviction info for it as an evictee is + // no longre relevant. + LastEvicted.clearEvicteeInfo(VirtReg.reg); return PhysReg; } @@ -2632,8 +2963,11 @@ // Try splitting VirtReg or interferences. unsigned NewVRegSizeBefore = NewVRegs.size(); unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); - if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) + if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) { + // If VirtReg got split, the eviction info is no longre relevant. + LastEvicted.clearEvicteeInfo(VirtReg.reg); return PhysReg; + } } // If we couldn't allocate a register from spilling, there is probably some @@ -2747,6 +3081,9 @@ MF->getSubtarget().enableRALocalReassignment( MF->getTarget().getOptLevel()); + EnableAdvancedRASplitCost = ConsiderLocalIntervalCost || + MF->getSubtarget().enableAdvancedRASplitCost(); + if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); @@ -2778,6 +3115,7 @@ IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. SetOfBrokenHints.clear(); + LastEvicted.clear(); allocatePhysRegs(); tryHintsRecoloring(); Index: llvm/trunk/lib/CodeGen/TargetSubtargetInfo.cpp =================================================================== --- llvm/trunk/lib/CodeGen/TargetSubtargetInfo.cpp +++ llvm/trunk/lib/CodeGen/TargetSubtargetInfo.cpp @@ -51,6 +51,10 @@ return true; } +bool TargetSubtargetInfo::enableAdvancedRASplitCost() const { + return false; +} + bool TargetSubtargetInfo::enablePostRAScheduler() const { return getSchedModel().PostRAScheduler; } Index: llvm/trunk/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/trunk/lib/Target/X86/X86Subtarget.h +++ llvm/trunk/lib/Target/X86/X86Subtarget.h @@ -672,6 +672,10 @@ AntiDepBreakMode getAntiDepBreakMode() const override { return TargetSubtargetInfo::ANTIDEP_CRITICAL; } + + virtual bool enableAdvancedRASplitCost() const { + return true; + } }; } // end namespace llvm Index: llvm/trunk/test/CodeGen/X86/bug26810.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bug26810.ll +++ llvm/trunk/test/CodeGen/X86/bug26810.ll @@ -0,0 +1,312 @@ +; RUN: llc < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s +; Make sure bad eviction sequence doesnt occur + +; Fix for bugzilla 26810. +; This test is meant to make sure bad eviction sequence like the one described +; below does not occur +; +; movapd %xmm7, 160(%esp) # 16-byte Spill +; movapd %xmm5, %xmm7 +; movapd %xmm4, %xmm5 +; movapd %xmm3, %xmm4 +; movapd %xmm2, %xmm3 +; some_inst +; movapd %xmm3, %xmm2 +; movapd %xmm4, %xmm3 +; movapd %xmm5, %xmm4 +; movapd %xmm7, %xmm5 +; movapd 160(%esp), %xmm7 # 16-byte Reload + +; Make sure we have no redundant copies in the problematic code section +; CHECK-LABEL: name: loop +; CHECK: bb.2.for.body: +; CHECK: SUBPDrr +; CHECK-NEXT: MOVAPSmr +; CHECK-NEXT: MOVAPSrm +; CHECK-NEXT: MULPDrm +; CHECK-NEXT: ADDPDrr +; CHECK-NEXT: ADD32ri8 + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-linux-gnu" + +%struct._iobuf = type { i8* } + +$"\01??_C@_01NOFIACDB@w?$AA@" = comdat any + +$"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@" = comdat any + +@"\01?v@@3PAU__m128d@@A" = global [8 x <2 x double>] zeroinitializer, align 16 +@"\01?m1@@3PAU__m128d@@A" = local_unnamed_addr global [76800000 x <2 x double>] zeroinitializer, align 16 +@"\01?m2@@3PAU__m128d@@A" = local_unnamed_addr global [8 x <2 x double>] zeroinitializer, align 16 +@"\01??_C@_01NOFIACDB@w?$AA@" = linkonce_odr unnamed_addr constant [2 x i8] c"w\00", comdat, align 1 +@"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@" = linkonce_odr unnamed_addr constant [10 x i8] c"/dev/null\00", comdat, align 1 + +; Function Attrs: norecurse +define i32 @main() local_unnamed_addr #0 { +entry: + tail call void @init() + %0 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8 + %1 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 + %2 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8 + %3 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8 + %4 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8 + %5 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8 + %6 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8 + %7 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8 + %.promoted.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8 + %.promoted51.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 + %.promoted53.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8 + %.promoted55.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8 + %.promoted57.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8 + %.promoted59.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8 + %.promoted61.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8 + %.promoted63.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8 + br label %for.body.i + +for.body.i: ; preds = %for.body.i, %entry + %add.i64.i = phi <2 x double> [ %.promoted63.i, %entry ], [ %add.i.i, %for.body.i ] + %add.i3662.i = phi <2 x double> [ %.promoted61.i, %entry ], [ %add.i36.i, %for.body.i ] + %add.i3860.i = phi <2 x double> [ %.promoted59.i, %entry ], [ %add.i38.i, %for.body.i ] + %add.i4058.i = phi <2 x double> [ %.promoted57.i, %entry ], [ %add.i40.i, %for.body.i ] + %add.i4256.i = phi <2 x double> [ %.promoted55.i, %entry ], [ %add.i42.i, %for.body.i ] + %add.i4454.i = phi <2 x double> [ %.promoted53.i, %entry ], [ %add.i44.i, %for.body.i ] + %add.i4652.i = phi <2 x double> [ %.promoted51.i, %entry ], [ %add.i46.i, %for.body.i ] + %add.i4850.i = phi <2 x double> [ %.promoted.i, %entry ], [ %add.i48.i, %for.body.i ] + %i.049.i = phi i32 [ 0, %entry ], [ %inc.i, %for.body.i ] + %arrayidx.i = getelementptr inbounds [76800000 x <2 x double>], [76800000 x <2 x double>]* @"\01?m1@@3PAU__m128d@@A", i32 0, i32 %i.049.i + %8 = load <2 x double>, <2 x double>* %arrayidx.i, align 16, !tbaa !8 + %mul.i.i = fmul <2 x double> %0, %8 + %add.i48.i = fadd <2 x double> %add.i4850.i, %mul.i.i + %mul.i47.i = fmul <2 x double> %1, %8 + %add.i46.i = fadd <2 x double> %add.i4652.i, %mul.i47.i + %mul.i45.i = fmul <2 x double> %2, %8 + %add.i44.i = fadd <2 x double> %add.i4454.i, %mul.i45.i + %mul.i43.i = fmul <2 x double> %3, %8 + %add.i42.i = fadd <2 x double> %add.i4256.i, %mul.i43.i + %mul.i41.i = fmul <2 x double> %4, %8 + %add.i40.i = fadd <2 x double> %add.i4058.i, %mul.i41.i + %mul.i39.i = fmul <2 x double> %5, %8 + %add.i38.i = fadd <2 x double> %add.i3860.i, %mul.i39.i + %mul.i37.i = fmul <2 x double> %6, %8 + %add.i36.i = fsub <2 x double> %add.i3662.i, %mul.i37.i + %mul.i35.i = fmul <2 x double> %7, %8 + %add.i.i = fadd <2 x double> %add.i64.i, %mul.i35.i + %inc.i = add nuw nsw i32 %i.049.i, 1 + %exitcond.i = icmp eq i32 %inc.i, 76800000 + br i1 %exitcond.i, label %loop.exit, label %for.body.i + +loop.exit: ; preds = %for.body.i + store <2 x double> %add.i48.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8 + store <2 x double> %add.i46.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 + store <2 x double> %add.i46.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 + store <2 x double> %add.i44.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8 + store <2 x double> %add.i42.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8 + store <2 x double> %add.i40.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8 + store <2 x double> %add.i38.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8 + store <2 x double> %add.i36.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8 + store <2 x double> %add.i.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8 + %call.i = tail call %struct._iobuf* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @"\01??_C@_01NOFIACDB@w?$AA@", i32 0, i32 0)) #7 + %call1.i = tail call i32 @fwrite(i8* bitcast ([8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A" to i8*), i32 16, i32 8, %struct._iobuf* %call.i) #7 + %call2.i = tail call i32 @fclose(%struct._iobuf* %call.i) #7 + ret i32 0 +} + +define void @init() local_unnamed_addr #1 { +entry: + call void @llvm.memset.p0i8.i32(i8* bitcast ([8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A" to i8*), i8 0, i32 128, i32 16, i1 false) + %call.i = tail call i64 @_time64(i64* null) + %conv = trunc i64 %call.i to i32 + tail call void @srand(i32 %conv) + br label %for.body6 + +for.body6: ; preds = %for.body6, %entry + %i2.051 = phi i32 [ 0, %entry ], [ %inc14, %for.body6 ] + %call7 = tail call i32 @rand() + %conv8 = sitofp i32 %call7 to double + %tmp.sroa.0.0.vec.insert = insertelement <2 x double> undef, double %conv8, i32 0 + %call9 = tail call i32 @rand() + %conv10 = sitofp i32 %call9 to double + %tmp.sroa.0.8.vec.insert = insertelement <2 x double> %tmp.sroa.0.0.vec.insert, double %conv10, i32 1 + %arrayidx12 = getelementptr inbounds [76800000 x <2 x double>], [76800000 x <2 x double>]* @"\01?m1@@3PAU__m128d@@A", i32 0, i32 %i2.051 + store <2 x double> %tmp.sroa.0.8.vec.insert, <2 x double>* %arrayidx12, align 16, !tbaa !8 + %inc14 = add nuw nsw i32 %i2.051, 1 + %exitcond = icmp eq i32 %inc14, 76800000 + br i1 %exitcond, label %for.body21.preheader, label %for.body6 + +for.body21.preheader: ; preds = %for.body6 + %call25 = tail call i32 @rand() + %conv26 = sitofp i32 %call25 to double + %tmp23.sroa.0.0.vec.insert = insertelement <2 x double> undef, double %conv26, i32 0 + %call28 = tail call i32 @rand() + %conv29 = sitofp i32 %call28 to double + %tmp23.sroa.0.8.vec.insert = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert, double %conv29, i32 1 + store <2 x double> %tmp23.sroa.0.8.vec.insert, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8 + %call25.1 = tail call i32 @rand() + %conv26.1 = sitofp i32 %call25.1 to double + %tmp23.sroa.0.0.vec.insert.1 = insertelement <2 x double> undef, double %conv26.1, i32 0 + %call28.1 = tail call i32 @rand() + %conv29.1 = sitofp i32 %call28.1 to double + %tmp23.sroa.0.8.vec.insert.1 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.1, double %conv29.1, i32 1 + store <2 x double> %tmp23.sroa.0.8.vec.insert.1, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 + %call25.2 = tail call i32 @rand() + %conv26.2 = sitofp i32 %call25.2 to double + %tmp23.sroa.0.0.vec.insert.2 = insertelement <2 x double> undef, double %conv26.2, i32 0 + %call28.2 = tail call i32 @rand() + %conv29.2 = sitofp i32 %call28.2 to double + %tmp23.sroa.0.8.vec.insert.2 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.2, double %conv29.2, i32 1 + store <2 x double> %tmp23.sroa.0.8.vec.insert.2, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8 + %call25.3 = tail call i32 @rand() + %conv26.3 = sitofp i32 %call25.3 to double + %tmp23.sroa.0.0.vec.insert.3 = insertelement <2 x double> undef, double %conv26.3, i32 0 + %call28.3 = tail call i32 @rand() + %conv29.3 = sitofp i32 %call28.3 to double + %tmp23.sroa.0.8.vec.insert.3 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.3, double %conv29.3, i32 1 + store <2 x double> %tmp23.sroa.0.8.vec.insert.3, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8 + %call25.4 = tail call i32 @rand() + %conv26.4 = sitofp i32 %call25.4 to double + %tmp23.sroa.0.0.vec.insert.4 = insertelement <2 x double> undef, double %conv26.4, i32 0 + %call28.4 = tail call i32 @rand() + %conv29.4 = sitofp i32 %call28.4 to double + %tmp23.sroa.0.8.vec.insert.4 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.4, double %conv29.4, i32 1 + store <2 x double> %tmp23.sroa.0.8.vec.insert.4, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8 + %call25.5 = tail call i32 @rand() + %conv26.5 = sitofp i32 %call25.5 to double + %tmp23.sroa.0.0.vec.insert.5 = insertelement <2 x double> undef, double %conv26.5, i32 0 + %call28.5 = tail call i32 @rand() + %conv29.5 = sitofp i32 %call28.5 to double + %tmp23.sroa.0.8.vec.insert.5 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.5, double %conv29.5, i32 1 + store <2 x double> %tmp23.sroa.0.8.vec.insert.5, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8 + %call25.6 = tail call i32 @rand() + %conv26.6 = sitofp i32 %call25.6 to double + %tmp23.sroa.0.0.vec.insert.6 = insertelement <2 x double> undef, double %conv26.6, i32 0 + %call28.6 = tail call i32 @rand() + %conv29.6 = sitofp i32 %call28.6 to double + %tmp23.sroa.0.8.vec.insert.6 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.6, double %conv29.6, i32 1 + store <2 x double> %tmp23.sroa.0.8.vec.insert.6, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8 + %call25.7 = tail call i32 @rand() + %conv26.7 = sitofp i32 %call25.7 to double + %tmp23.sroa.0.0.vec.insert.7 = insertelement <2 x double> undef, double %conv26.7, i32 0 + %call28.7 = tail call i32 @rand() + %conv29.7 = sitofp i32 %call28.7 to double + %tmp23.sroa.0.8.vec.insert.7 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.7, double %conv29.7, i32 1 + store <2 x double> %tmp23.sroa.0.8.vec.insert.7, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8 + ret void +} + +; Function Attrs: norecurse nounwind +define void @loop() local_unnamed_addr #2 { +entry: + %0 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8 + %1 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 + %2 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8 + %3 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8 + %4 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8 + %5 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8 + %6 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8 + %7 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8 + %.promoted = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8 + %.promoted51 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 + %.promoted53 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8 + %.promoted55 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8 + %.promoted57 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8 + %.promoted59 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8 + %.promoted61 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8 + %.promoted63 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8 + br label %for.body + +for.cond.cleanup: ; preds = %for.body + store <2 x double> %add.i48, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8 + store <2 x double> %add.i46, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8 + store <2 x double> %add.i44, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8 + store <2 x double> %add.i42, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8 + store <2 x double> %add.i40, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8 + store <2 x double> %add.i38, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8 + store <2 x double> %add.i36, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8 + store <2 x double> %add.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8 + ret void + +for.body: ; preds = %for.body, %entry + %add.i64 = phi <2 x double> [ %.promoted63, %entry ], [ %add.i, %for.body ] + %add.i3662 = phi <2 x double> [ %.promoted61, %entry ], [ %add.i36, %for.body ] + %add.i3860 = phi <2 x double> [ %.promoted59, %entry ], [ %add.i38, %for.body ] + %add.i4058 = phi <2 x double> [ %.promoted57, %entry ], [ %add.i40, %for.body ] + %add.i4256 = phi <2 x double> [ %.promoted55, %entry ], [ %add.i42, %for.body ] + %add.i4454 = phi <2 x double> [ %.promoted53, %entry ], [ %add.i44, %for.body ] + %add.i4652 = phi <2 x double> [ %.promoted51, %entry ], [ %add.i46, %for.body ] + %add.i4850 = phi <2 x double> [ %.promoted, %entry ], [ %add.i48, %for.body ] + %i.049 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds [76800000 x <2 x double>], [76800000 x <2 x double>]* @"\01?m1@@3PAU__m128d@@A", i32 0, i32 %i.049 + %8 = load <2 x double>, <2 x double>* %arrayidx, align 16, !tbaa !8 + %mul.i = fmul <2 x double> %8, %0 + %add.i48 = fadd <2 x double> %add.i4850, %mul.i + %mul.i47 = fmul <2 x double> %8, %1 + %add.i46 = fadd <2 x double> %add.i4652, %mul.i47 + %mul.i45 = fmul <2 x double> %8, %2 + %add.i44 = fadd <2 x double> %add.i4454, %mul.i45 + %mul.i43 = fmul <2 x double> %8, %3 + %add.i42 = fadd <2 x double> %add.i4256, %mul.i43 + %mul.i41 = fmul <2 x double> %8, %4 + %add.i40 = fadd <2 x double> %add.i4058, %mul.i41 + %mul.i39 = fmul <2 x double> %8, %5 + %add.i38 = fadd <2 x double> %add.i3860, %mul.i39 + %mul.i37 = fmul <2 x double> %8, %6 + %add.i36 = fsub <2 x double> %add.i3662, %mul.i37 + %mul.i35 = fmul <2 x double> %8, %7 + %add.i = fadd <2 x double> %add.i64, %mul.i35 + %inc = add nuw nsw i32 %i.049, 1 + %exitcond = icmp eq i32 %inc, 76800000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +; Function Attrs: nounwind +define void @"\01?dump@@YAXXZ"() local_unnamed_addr #3 { +entry: + %call = tail call %struct._iobuf* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @"\01??_C@_01NOFIACDB@w?$AA@", i32 0, i32 0)) + %call1 = tail call i32 @fwrite(i8* bitcast ([8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A" to i8*), i32 16, i32 8, %struct._iobuf* %call) + %call2 = tail call i32 @fclose(%struct._iobuf* %call) + ret void +} + +declare void @srand(i32) local_unnamed_addr #4 + +declare i32 @rand() local_unnamed_addr #4 + +; Function Attrs: nounwind +declare noalias %struct._iobuf* @fopen(i8* nocapture readonly, i8* nocapture readonly) local_unnamed_addr #5 + +; Function Attrs: nounwind +declare i32 @fwrite(i8* nocapture, i32, i32, %struct._iobuf* nocapture) local_unnamed_addr #5 + +; Function Attrs: nounwind +declare i32 @fclose(%struct._iobuf* nocapture) local_unnamed_addr #5 + +declare i64 @_time64(i64*) local_unnamed_addr #4 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) #6 + +attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #5 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #6 = { argmemonly nounwind } +attributes #7 = { nounwind } + +!llvm.linker.options = !{!0, !1, !2, !3, !4} +!llvm.module.flags = !{!5, !6} +!llvm.ident = !{!7} + +!0 = !{!"/FAILIFMISMATCH:\22_MSC_VER=1900\22"} +!1 = !{!"/FAILIFMISMATCH:\22_ITERATOR_DEBUG_LEVEL=0\22"} +!2 = !{!"/FAILIFMISMATCH:\22RuntimeLibrary=MT_StaticRelease\22"} +!3 = !{!"/DEFAULTLIB:libcpmt.lib"} +!4 = !{!"/FAILIFMISMATCH:\22_CRT_STDIO_ISO_WIDE_SPECIFIERS=0\22"} +!5 = !{i32 1, !"NumRegisterParameters", i32 0} +!6 = !{i32 1, !"wchar_size", i32 2} +!7 = !{!"clang version 5.0.0 (cfe/trunk 305640)"} +!8 = !{!9, !9, i64 0} +!9 = !{!"omnipotent char", !10, i64 0} +!10 = !{!"Simple C++ TBAA"} Index: llvm/trunk/test/CodeGen/X86/greedy_regalloc_bad_eviction_sequence.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/greedy_regalloc_bad_eviction_sequence.ll +++ llvm/trunk/test/CodeGen/X86/greedy_regalloc_bad_eviction_sequence.ll @@ -0,0 +1,116 @@ +; RUN: llc < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s +; Make sure bad eviction sequence doesnt occur + +; Part of the fix for bugzilla 26810. +; This test is meant to make sure bad eviction sequence like the one described +; below does not occur +; +; movl %ebp, 8(%esp) # 4-byte Spill +; movl %ecx, %ebp +; movl %ebx, %ecx +; movl %edi, %ebx +; movl %edx, %edi +; cltd +; idivl %esi +; movl %edi, %edx +; movl %ebx, %edi +; movl %ecx, %ebx +; movl %ebp, %ecx +; movl 16(%esp), %ebp # 4 - byte Reload + +; Make sure we have no redundant copies in the problematic code seqtion +; CHECK-LABEL: name: bar +; CHECK: bb.3.for.body: +; CHECK: %eax = COPY +; CHECK-NEXT: CDQ +; CHECK-NEXT: IDIV32r +; CHECK-NEXT: ADD32rr + + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-linux-gnu" + + +; Function Attrs: norecurse nounwind readonly +define i32 @bar(i32 %size, i32* nocapture readonly %arr, i32* nocapture readnone %tmp) local_unnamed_addr #1 { +entry: + %0 = load i32, i32* %arr, align 4, !tbaa !3 + %arrayidx3 = getelementptr inbounds i32, i32* %arr, i32 1 + %1 = load i32, i32* %arrayidx3, align 4, !tbaa !3 + %arrayidx5 = getelementptr inbounds i32, i32* %arr, i32 2 + %2 = load i32, i32* %arrayidx5, align 4, !tbaa !3 + %arrayidx7 = getelementptr inbounds i32, i32* %arr, i32 3 + %3 = load i32, i32* %arrayidx7, align 4, !tbaa !3 + %arrayidx9 = getelementptr inbounds i32, i32* %arr, i32 4 + %4 = load i32, i32* %arrayidx9, align 4, !tbaa !3 + %arrayidx11 = getelementptr inbounds i32, i32* %arr, i32 5 + %5 = load i32, i32* %arrayidx11, align 4, !tbaa !3 + %arrayidx13 = getelementptr inbounds i32, i32* %arr, i32 6 + %6 = load i32, i32* %arrayidx13, align 4, !tbaa !3 + %arrayidx15 = getelementptr inbounds i32, i32* %arr, i32 7 + %7 = load i32, i32* %arrayidx15, align 4, !tbaa !3 + %arrayidx17 = getelementptr inbounds i32, i32* %arr, i32 8 + %8 = load i32, i32* %arrayidx17, align 4, !tbaa !3 + %cmp69 = icmp sgt i32 %size, 1 + br i1 %cmp69, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.body, %entry + %x0.0.lcssa = phi i32 [ %0, %entry ], [ %add, %for.body ] + %x1.0.lcssa = phi i32 [ %1, %entry ], [ %sub, %for.body ] + %x2.0.lcssa = phi i32 [ %2, %entry ], [ %mul, %for.body ] + %x3.0.lcssa = phi i32 [ %3, %entry ], [ %div, %for.body ] + %x4.0.lcssa = phi i32 [ %4, %entry ], [ %add19, %for.body ] + %x5.0.lcssa = phi i32 [ %5, %entry ], [ %sub20, %for.body ] + %x6.0.lcssa = phi i32 [ %6, %entry ], [ %add21, %for.body ] + %x7.0.lcssa = phi i32 [ %7, %entry ], [ %mul22, %for.body ] + %x8.0.lcssa = phi i32 [ %8, %entry ], [ %sub23, %for.body ] + %mul24 = mul nsw i32 %x1.0.lcssa, %x0.0.lcssa + %mul25 = mul nsw i32 %mul24, %x2.0.lcssa + %mul26 = mul nsw i32 %mul25, %x3.0.lcssa + %mul27 = mul nsw i32 %mul26, %x4.0.lcssa + %mul28 = mul nsw i32 %mul27, %x5.0.lcssa + %mul29 = mul nsw i32 %mul28, %x6.0.lcssa + %mul30 = mul nsw i32 %mul29, %x7.0.lcssa + %mul31 = mul nsw i32 %mul30, %x8.0.lcssa + ret i32 %mul31 + +for.body: ; preds = %entry, %for.body + %i.079 = phi i32 [ %inc, %for.body ], [ 1, %entry ] + %x8.078 = phi i32 [ %sub23, %for.body ], [ %8, %entry ] + %x7.077 = phi i32 [ %mul22, %for.body ], [ %7, %entry ] + %x6.076 = phi i32 [ %add21, %for.body ], [ %6, %entry ] + %x5.075 = phi i32 [ %sub20, %for.body ], [ %5, %entry ] + %x4.074 = phi i32 [ %add19, %for.body ], [ %4, %entry ] + %x3.073 = phi i32 [ %div, %for.body ], [ %3, %entry ] + %x2.072 = phi i32 [ %mul, %for.body ], [ %2, %entry ] + %x1.071 = phi i32 [ %sub, %for.body ], [ %1, %entry ] + %x0.070 = phi i32 [ %add, %for.body ], [ %0, %entry ] + %add = add nsw i32 %x1.071, %x0.070 + %sub = sub nsw i32 %x1.071, %x2.072 + %mul = mul nsw i32 %x3.073, %x2.072 + %div = sdiv i32 %x3.073, %x4.074 + %add19 = add nsw i32 %x5.075, %x4.074 + %sub20 = sub nsw i32 %x5.075, %x6.076 + %add21 = add nsw i32 %x7.077, %x6.076 + %mul22 = mul nsw i32 %x8.078, %x7.077 + %sub23 = sub nsw i32 %x8.078, %add + %inc = add nuw nsw i32 %i.079, 1 + %exitcond = icmp eq i32 %inc, %size + br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !7 +} + +attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"NumRegisterParameters", i32 0} +!1 = !{i32 1, !"wchar_size", i32 2} +!2 = !{!"clang version 5.0.0 (cfe/trunk 305640)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = distinct !{!7, !8} +!8 = !{!"llvm.loop.unroll.disable"}