Index: lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -164,8 +164,8 @@ // If any of the stores are a memset, then it is always good to extend the // memset. - for (unsigned i = 0, e = TheStores.size(); i != e; ++i) - if (!isa(TheStores[i])) + for (auto it : TheStores) + if (!isa(*it)) return true; // Assume that the code generator is capable of merging pairs of stores @@ -235,73 +235,37 @@ void addRange(int64_t Start, int64_t Size, Value *Ptr, unsigned Alignment, Instruction *Inst); + void mergeRanges(); }; } // end anon namespace - -/// addRange - Add a new store to the MemsetRanges data structure. This adds a -/// new range for the specified store at the specified offset, merging into -/// existing ranges as appropriate. -/// -/// Do a linear search of the ranges to see if this can be joined and/or to -/// find the insertion point in the list. We keep the ranges sorted for -/// simplicity here. This is a linear search of a linked list, which is ugly, -/// however the number of ranges is limited, so this won't get crazy slow. void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, unsigned Alignment, Instruction *Inst) { int64_t End = Start+Size; - range_iterator I = Ranges.begin(), E = Ranges.end(); - - while (I != E && Start > I->End) - ++I; - - // We now know that I == E, in which case we didn't find anything to merge - // with, or that Start <= I->End. If End < I->Start or I == E, then we need - // to insert a new range. Handle this now. - if (I == E || End < I->Start) { - MemsetRange &R = *Ranges.insert(I, MemsetRange()); - R.Start = Start; - R.End = End; - R.StartPtr = Ptr; - R.Alignment = Alignment; - R.TheStores.push_back(Inst); - return; - } - - // This store overlaps with I, add it. - I->TheStores.push_back(Inst); - - // At this point, we may have an interval that completely contains our store. - // If so, just add it to the interval and return. - if (I->Start <= Start && I->End >= End) - return; - - // Now we know that Start <= I->End and End >= I->Start so the range overlaps - // but is not entirely contained within the range. - - // See if the range extends the start of the range. In this case, it couldn't - // possibly cause it to join the prior range, because otherwise we would have - // stopped on *it*. - if (Start < I->Start) { - I->Start = Start; - I->StartPtr = Ptr; - I->Alignment = Alignment; - } + Ranges.push_back(MemsetRange { Start, End, Ptr, Alignment, { Inst } }); +} - // Now we know that Start <= I->End and Start >= I->Start (so the startpoint - // is in or right at the end of I), and that End >= I->Start. Extend I out to - // End. - if (End > I->End) { - I->End = End; - range_iterator NextI = I; - while (++NextI != E && End >= NextI->Start) { - // Merge the range in. - I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end()); - if (NextI->End > I->End) - I->End = NextI->End; - Ranges.erase(NextI); - NextI = I; +void MemsetRanges::mergeRanges() { + // Sort ranges by their starting address. + Ranges.sort([](const MemsetRange &lhs, const MemsetRange &rhs) { + return lhs.Start < rhs.Start; + }); + + // Merge overlapping ranges. + range_iterator I = Ranges.begin(); + range_iterator E = Ranges.end(); + range_iterator HEAD = I++; + + while (I != E) { + if (I->Start > HEAD->End) { + // New interval, don't merge. + HEAD = I++; + } else { + // Interval overlaps with head, merge it. + HEAD->End = std::max(HEAD->End, I->End); + HEAD->TheStores.append(I->TheStores.begin(), I->TheStores.end()); + Ranges.erase(I++); } } } @@ -436,6 +400,9 @@ // interesting as a small compile-time optimization. Ranges.addInst(0, StartInst); + // Merge overlapping ranges. + Ranges.mergeRanges(); + // If we create any memsets, we put it right before the first instruction that // isn't part of the memset block. This ensure that the memset is dominated // by any addressing instruction needed by the start of the block. @@ -470,8 +437,8 @@ Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); DEBUG(dbgs() << "Replace stores:\n"; - for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) - dbgs() << *Range.TheStores[i] << '\n'; + for (auto it : Range.TheStores) + dbgs() << *it << '\n'; dbgs() << "With: " << *AMemSet << '\n'); if (!Range.TheStores.empty())