Index: llvm/docs/ProgrammersManual.rst =================================================================== --- llvm/docs/ProgrammersManual.rst +++ llvm/docs/ProgrammersManual.rst @@ -2341,11 +2341,11 @@ .. _ds_bit: -Bit storage containers (BitVector, SparseBitVector) ---------------------------------------------------- +Bit storage containers (BitVector, SparseBitVector, CoalescingBitVector) +------------------------------------------------------------------------ -Unlike the other containers, there are only two bit storage containers, and -choosing when to use each is relatively straightforward. +There are four bit storage containers, and choosing when to use each is +relatively straightforward. One additional option is ``std::vector``: we discourage its use for two reasons 1) the implementation in many common compilers (e.g. commonly @@ -2395,6 +2395,22 @@ on size) of the current bit is also O(1). As a general statement, testing/setting bits in a SparseBitVector is O(distance away from last set bit). +.. _dss_coalescingbitvector: + +CoalescingBitVector +^^^^^^^^^^^^^^^^^^^ + +The CoalescingBitVector container is similar in principle to a SparseBitVector, +but is optimized to represent large contiguous ranges of set bits compactly. It +does this by coalescing contiguous ranges of set bits into intervals. Searching +for a bit in a CoalescingBitVector is O(log(gaps between contiguous ranges)). + +CoalescingBitVector is a better choice than BitVector when gaps between ranges +of set bits are large. It's a better choice than SparseBitVector when find() +operations must have fast, predictable performance. However, it's not a good +choice for representing sets which have lots of very short ranges. E.g. the set +`{2*x : x \in [0, n)}` would be a pathological input. + .. _debugging: Debugging Index: llvm/include/llvm/ADT/CoalescingBitVector.h =================================================================== --- /dev/null +++ llvm/include/llvm/ADT/CoalescingBitVector.h @@ -0,0 +1,395 @@ +//===- llvm/ADT/CoalescingBitVector.h - A coalescing bitvector --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file A bitvector that uses an IntervalMap to coalesce adjacent elements +/// into intervals. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_COALESCINGBITVECTOR_H +#define LLVM_ADT_COALESCINGBITVECTOR_H + +#include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +namespace llvm { + +/// A bitvector that, under the hood, relies on an IntervalMap to coalesce +/// elements into intervals. Good for representing sets which predominantly +/// contain contiguous ranges. Bad for representing sets with lots of gaps +/// between elements. +/// +/// Compared to SparseBitVector, CoalescingBitVector offers more predictable +/// performance for non-sequential find() operations. +/// +/// \param IndexT - The type of the index into the bitvector. +/// \param N - The first N coalesced intervals of set bits are stored in-place +/// (in the initial heap allocation). +template class CoalescingBitVector { + static_assert(std::is_unsigned::value, + "Index must be an unsigned integer."); + + using ThisT = CoalescingBitVector; + + /// An interval map for closed integer ranges. The mapped values are unused. + using MapT = IntervalMap; + + using UnderlyingIterator = typename MapT::const_iterator; + + using IntervalT = std::pair; + +public: + using Allocator = typename MapT::Allocator; + + /// Construct by passing in a CoalescingBitVector::Allocator + /// reference. + CoalescingBitVector(Allocator &Alloc) + : Alloc(&Alloc), Intervals(std::make_unique(Alloc)) {} + + /// \name Copy/move constructors and assignment operators. + /// @{ + + CoalescingBitVector(const ThisT &RHS) + : Alloc(RHS.Alloc), Intervals(std::make_unique(*RHS.Alloc)) { + initFromRHS(RHS); + } + + ThisT &operator=(const ThisT &RHS) { + clear(); + initFromRHS(RHS); + return *this; + } + + CoalescingBitVector(ThisT &&RHS) + : Alloc(RHS.Alloc), Intervals(std::move(RHS.Intervals)) {} + + ThisT &operator=(ThisT &&RHS) { + Alloc = RHS.Alloc; + Intervals = std::move(RHS.Intervals); + return *this; + } + + /// @} + + /// Clear all the bits. + void clear() { Intervals->clear(); } + + /// Check whether no bits are set. + bool empty() const { return Intervals->empty(); } + + /// Count the number of set bits. + unsigned count() const { + unsigned Bits = 0; + for (auto It = Intervals->begin(), End = Intervals->end(); It != End; ++It) + Bits += 1 + It.stop() - It.start(); + return Bits; + } + + /// Set the bit at \p Index. + /// + /// This method does /not/ support setting a bit that has already been set, + /// for efficiency reasons. If possible, restructure your code to not set the + /// same bit multiple times, or use \ref test_and_set. + void set(IndexT Index) { insert(Index, Index); } + + /// Set the bits set in \p RHS. + /// + /// This method does /not/ support setting already-set bits, see \ref set + /// for the rationale. For a safe set union operation, use \ref operator|=. + void set(const ThisT &RHS) { initFromRHS(RHS); } + + /// Set the bits at \p Indices. Used for testing, primarily. + void set(std::initializer_list Indices) { + for (IndexT Index : Indices) + set(Index); + } + + /// Check whether the bit at \p Index is set. + bool test(IndexT Index) const { + const auto It = Intervals->find(Index); + if (It == Intervals->end()) + return false; + assert(It.stop() >= Index && "Interval must end after Index"); + return It.start() <= Index; + } + + /// Set the bit at \p Index. Supports setting an already-set bit. + void test_and_set(IndexT Index) { + if (!test(Index)) + set(Index); + } + + /// Reset the bit at \p Index. Supports resetting an already-unset bit. + void reset(IndexT Index) { + auto It = Intervals->find(Index); + if (It == Intervals->end()) + return; + + // Split the interval containing Index into up to two parts: one from + // [Start, Index-1] and another from [Index+1, Stop]. If Index is equal to + // either Start or Stop, we create one new interval. If Index is equal to + // both Start and Stop, we simply erase the existing interval. + IndexT Start = It.start(); + if (Index < Start) + // The index was not set. + return; + IndexT Stop = It.stop(); + assert(Index <= Stop && "Wrong interval for index"); + It.erase(); + if (Start < Index) + insert(Start, Index - 1); + if (Index < Stop) + insert(Index + 1, Stop); + } + + /// Set union. If \p RHS is guaranteed to not overlap with this, \ref set may + /// be a faster alternative. + void operator|=(const ThisT &RHS) { + // Get the overlaps between the two interval maps. + SmallVector Overlaps; + getOverlaps(RHS, Overlaps); + // Insert all intervals from RHS. If an interval from RHS would overlap + // with an existing interval, be careful to only insert the non-overlapping + // portions. + unsigned CurrentOlap = 0; + for (auto It = RHS.Intervals->begin(), End = RHS.Intervals->end(); + It != End; ++It) { + IndexT Start = It.start(); + IndexT Stop = It.stop(); + + // [Start;Stop] and [OlapStart;OlapStop] overlap iff OlapStart <= Stop + // and Start <= OlapStop. + if (CurrentOlap < Overlaps.size()) { + IndexT OlapStart, OlapStop; + std::tie(OlapStart, OlapStop) = Overlaps[CurrentOlap]; + if (OlapStart <= Stop && Start <= OlapStop) { + if (Start < OlapStart) + insert(Start, OlapStart - 1); + if (OlapStop < Stop) + insert(OlapStop + 1, Stop); + ++CurrentOlap; + continue; + } + } + + insert(Start, Stop); + } + } + + /// Set intersection. + void operator&=(const ThisT &RHS) { + // Get the overlaps between the two interval maps (i.e. the intersection). + SmallVector Overlaps; + getOverlaps(RHS, Overlaps); + // Rebuild the interval map, including only the overlaps. + clear(); + for (IntervalT Overlap : Overlaps) + insert(Overlap.first, Overlap.second); + } + + /// Reset all bits present in \p RHS. + void intersectWithComplement(const ThisT &RHS) { + SmallVector Overlaps; + if (!getOverlaps(RHS, Overlaps)) { + // If there is no overlap with RHS, the intersection is empty. + return; + } + + // Delete the overlapping intervals. Split up intervals that only partially + // intersect an overlap. + for (IntervalT Overlap : Overlaps) { + IndexT OlapStart, OlapStop; + std::tie(OlapStart, OlapStop) = Overlap; + + auto It = Intervals->find(OlapStart); + IndexT CurrStart = It.start(); + IndexT CurrStop = It.stop(); + assert(CurrStart <= OlapStart && OlapStop <= CurrStop && + "Expected some intersection!"); + + // Split the overlap interval into up to two parts: one from [CurrStart, + // OlapStart-1] and another from [OlapStop+1, CurrStop]. If OlapStart is + // equal to CurrStart, the first split interval is unnecessary. Ditto for + // when OlapStop is equal to CurrStop, we omit the second split interval. + It.erase(); + if (CurrStart < OlapStart) + insert(CurrStart, OlapStart - 1); + if (OlapStop < CurrStop) + insert(OlapStop + 1, CurrStop); + } + } + + bool operator==(const ThisT &RHS) const { + // We cannot just use std::equal because it checks the dereferenced values + // of an iterator pair for equality, not the iterators themselves. In our + // case that results in comparison of the (unused) IntervalMap values. + auto ItL = Intervals->begin(); + auto ItR = RHS.Intervals->begin(); + while (ItL != Intervals->end() && ItR != RHS.Intervals->end() && + ItL.start() == ItR.start() && ItL.stop() == ItR.stop()) { + ++ItL; + ++ItR; + } + return ItL == Intervals->end() && ItR == RHS.Intervals->end(); + } + + bool operator!=(const ThisT &RHS) const { return !operator==(RHS); } + + class const_iterator + : public std::iterator { + friend class CoalescingBitVector; + + // For performance reasons, make the offset at the end different than the + // one used in \ref begin, to optimize the common `It == end()` pattern. + static constexpr unsigned kIteratorAtTheEndOffset = ~0u; + + UnderlyingIterator MapIterator; + unsigned OffsetIntoMapIterator = 0; + + // Querying the start/stop of an IntervalMap iterator can be very expensive. + // Cache these values for performance reasons. + IndexT CachedStart = IndexT(); + IndexT CachedStop = IndexT(); + + void setToEnd() { + OffsetIntoMapIterator = kIteratorAtTheEndOffset; + CachedStart = IndexT(); + CachedStop = IndexT(); + } + + /// MapIterator has just changed, reset the cached state to point to the + /// start of the new underlying iterator. + void resetCache() { + if (MapIterator.valid()) { + OffsetIntoMapIterator = 0; + CachedStart = MapIterator.start(); + CachedStop = MapIterator.stop(); + } else { + setToEnd(); + } + } + + /// Advance the iterator to \p Index, if it is contained within the current + /// interval. + void advanceTo(IndexT Index) { + assert(OffsetIntoMapIterator == 0 && "Not implemented"); + assert(Index <= CachedStop && "Cannot advance to OOB index"); + if (Index < CachedStart) + // We're already past this index. + return; + OffsetIntoMapIterator = Index - CachedStart; + } + + const_iterator(UnderlyingIterator MapIt) : MapIterator(MapIt) { + resetCache(); + } + + public: + const_iterator() { setToEnd(); } + + bool operator==(const const_iterator &RHS) const { + // Do /not/ compare MapIterator for equality, as this is very expensive. + // The cached start/stop values make that check unnecessary. + return std::tie(OffsetIntoMapIterator, CachedStart, CachedStop) == + std::tie(RHS.OffsetIntoMapIterator, RHS.CachedStart, + RHS.CachedStop); + } + + bool operator!=(const const_iterator &RHS) const { + return !operator==(RHS); + } + + IndexT operator*() const { return CachedStart + OffsetIntoMapIterator; } + + const_iterator &operator++() { // Pre-increment (++It). + if (CachedStart + OffsetIntoMapIterator < CachedStop) { + // Keep going within the current interval. + ++OffsetIntoMapIterator; + } else { + // We reached the end of the current interval: advance. + ++MapIterator; + resetCache(); + } + return *this; + } + + const_iterator operator++(int) { // Post-increment (It++). + const_iterator tmp = *this; + operator++(); + return tmp; + } + }; + + const_iterator begin() const { return const_iterator(Intervals->begin()); } + + const_iterator end() const { return const_iterator(); } + + /// Return an iterator pointing to the first set bit AT, OR AFTER, \p Index. + /// If no such set bit exists, return end(). This is like std::lower_bound. + const_iterator find(IndexT Index) const { + auto UnderlyingIt = Intervals->find(Index); + if (UnderlyingIt == Intervals->end()) + return end(); + auto It = const_iterator(UnderlyingIt); + It.advanceTo(Index); + return It; + } + + void print(raw_ostream &OS) const { + OS << "{"; + for (auto It = Intervals->begin(), End = Intervals->end(); It != End; + ++It) { + OS << "[" << It.start(); + if (It.start() != It.stop()) + OS << ", " << It.stop(); + OS << "]"; + } + OS << "}"; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump() const { + // LLDB swallows the first line of output after callling dump(). Add + // newlines before/after the braces to work around this. + dbgs() << "\n"; + print(dbgs()); + dbgs() << "\n"; + } +#endif + +private: + void initFromRHS(const ThisT &RHS) { + for (auto It = RHS.Intervals->begin(), End = RHS.Intervals->end(); + It != End; ++It) + insert(It.start(), It.stop()); + } + + void insert(IndexT Start, IndexT End) { Intervals->insert(Start, End, 0); } + + /// Record the overlaps between \p this and \p RHS in \p Overlaps. Return + /// true if there is any overlap. + bool getOverlaps(const ThisT &RHS, + SmallVectorImpl &Overlaps) const { + for (IntervalMapOverlaps I(*Intervals, *RHS.Intervals); + I.valid(); ++I) + Overlaps.emplace_back(I.start(), I.stop()); + return !Overlaps.empty(); + } + + Allocator *Alloc; + std::unique_ptr Intervals; +}; + +} // namespace llvm + +#endif // LLVM_ADT_COALESCINGBITVECTOR_H Index: llvm/unittests/ADT/CMakeLists.txt =================================================================== --- llvm/unittests/ADT/CMakeLists.txt +++ llvm/unittests/ADT/CMakeLists.txt @@ -12,6 +12,7 @@ BitVectorTest.cpp BreadthFirstIteratorTest.cpp BumpPtrListTest.cpp + CoalescingBitVectorTest.cpp DAGDeltaAlgorithmTest.cpp DeltaAlgorithmTest.cpp DenseMapTest.cpp Index: llvm/unittests/ADT/CoalescingBitVectorTest.cpp =================================================================== --- /dev/null +++ llvm/unittests/ADT/CoalescingBitVectorTest.cpp @@ -0,0 +1,453 @@ +//=== CoalescingBitVectorTest.cpp - CoalescingBitVector unit tests --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/CoalescingBitVector.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +using UBitVec = CoalescingBitVector; +using U64BitVec = CoalescingBitVector; + +bool elementsMatch(const UBitVec &BV, std::initializer_list List) { + if (!std::equal(BV.begin(), BV.end(), List.begin(), List.end())) { + UBitVec::Allocator Alloc; + UBitVec Expected(Alloc); + Expected.set(List); + dbgs() << "elementsMatch:\n" + << " Expected: "; + Expected.print(dbgs()); + dbgs() << " Got: "; + BV.print(dbgs()); + return false; + } + return true; +} + +TEST(CoalescingBitVectorTest, Set) { + UBitVec::Allocator Alloc; + UBitVec BV1(Alloc); + UBitVec BV2(Alloc); + + BV1.set(0); + EXPECT_TRUE(BV1.test(0)); + EXPECT_FALSE(BV1.test(1)); + + BV2.set(BV1); + EXPECT_TRUE(BV2.test(0)); +} + +TEST(CoalescingBitVectorTest, Count) { + UBitVec::Allocator Alloc; + UBitVec BV(Alloc); + EXPECT_EQ(BV.count(), 0u); + BV.set(0); + EXPECT_EQ(BV.count(), 1u); + BV.set({11, 12, 13, 14, 15}); + EXPECT_EQ(BV.count(), 6u); +} + +TEST(CoalescingBitVectorTest, ClearAndEmpty) { + UBitVec::Allocator Alloc; + UBitVec BV(Alloc); + EXPECT_TRUE(BV.empty()); + BV.set(1); + EXPECT_FALSE(BV.empty()); + BV.clear(); + EXPECT_TRUE(BV.empty()); +} + +TEST(CoalescingBitVector, Copy) { + UBitVec::Allocator Alloc; + UBitVec BV1(Alloc); + BV1.set(0); + UBitVec BV2 = BV1; + EXPECT_TRUE(elementsMatch(BV1, {0})); + EXPECT_TRUE(elementsMatch(BV2, {0})); + BV2.set(5); + BV2 = BV1; + EXPECT_TRUE(elementsMatch(BV1, {0})); + EXPECT_TRUE(elementsMatch(BV2, {0})); +} + +TEST(CoalescingBitVector, Move) { + UBitVec::Allocator Alloc; + UBitVec BV1(Alloc); + BV1.set(0); + UBitVec BV2 = std::move(BV1); + EXPECT_TRUE(elementsMatch(BV2, {0})); + BV2.set(5); + BV1 = std::move(BV2); + EXPECT_TRUE(elementsMatch(BV1, {0, 5})); +} + +TEST(CoalescingBitVectorTest, Iterators) { + UBitVec::Allocator Alloc; + UBitVec BV(Alloc); + + BV.set({0, 1, 2}); + + auto It = BV.begin(); + EXPECT_EQ(It, BV.begin()); + EXPECT_EQ(*It, 0u); + ++It; + EXPECT_EQ(*It, 1u); + ++It; + EXPECT_EQ(*It, 2u); + ++It; + EXPECT_EQ(It, BV.end()); + EXPECT_EQ(BV.end(), BV.end()); + + It = BV.begin(); + EXPECT_EQ(It, BV.begin()); + auto ItCopy = It++; + EXPECT_EQ(ItCopy, BV.begin()); + EXPECT_EQ(*ItCopy, 0u); + EXPECT_EQ(*It, 1u); + + EXPECT_TRUE(elementsMatch(BV, {0, 1, 2})); + + BV.set({4, 5, 6}); + EXPECT_TRUE(elementsMatch(BV, {0, 1, 2, 4, 5, 6})); + + BV.set(3); + EXPECT_TRUE(elementsMatch(BV, {0, 1, 2, 3, 4, 5, 6})); + + BV.set(10); + EXPECT_TRUE(elementsMatch(BV, {0, 1, 2, 3, 4, 5, 6, 10})); + + // Should be able to reset unset bits. + BV.reset(3); + BV.reset(3); + BV.reset(20000); + BV.set({1000, 1001, 1002}); + EXPECT_TRUE(elementsMatch(BV, {0, 1, 2, 4, 5, 6, 10, 1000, 1001, 1002})); + + auto It1 = BV.begin(); + EXPECT_EQ(It1, BV.begin()); + EXPECT_EQ(++It1, ++BV.begin()); + EXPECT_NE(It1, BV.begin()); + EXPECT_NE(It1, BV.end()); +} + +TEST(CoalescingBitVectorTest, Reset) { + UBitVec::Allocator Alloc; + UBitVec BV(Alloc); + + BV.set(0); + EXPECT_TRUE(BV.test(0)); + BV.reset(0); + EXPECT_FALSE(BV.test(0)); + + BV.clear(); + BV.set({1, 2, 3}); + BV.reset(1); + EXPECT_TRUE(elementsMatch(BV, {2, 3})); + + BV.clear(); + BV.set({1, 2, 3}); + BV.reset(2); + EXPECT_TRUE(elementsMatch(BV, {1, 3})); + + BV.clear(); + BV.set({1, 2, 3}); + BV.reset(3); + EXPECT_TRUE(elementsMatch(BV, {1, 2})); +} + +TEST(CoalescingBitVectorTest, Comparison) { + UBitVec::Allocator Alloc; + UBitVec BV1(Alloc); + UBitVec BV2(Alloc); + + // Single interval. + BV1.set({1, 2, 3}); + BV2.set({1, 2, 3}); + EXPECT_EQ(BV1, BV2); + EXPECT_FALSE(BV1 != BV2); + + // Different number of intervals. + BV1.clear(); + BV2.clear(); + BV1.set({1, 2, 3}); + EXPECT_NE(BV1, BV2); + + // Multiple intervals. + BV1.clear(); + BV2.clear(); + BV1.set({1, 2, 11, 12}); + BV2.set({1, 2, 11, 12}); + EXPECT_EQ(BV1, BV2); + BV2.reset(1); + EXPECT_NE(BV1, BV2); + BV2.set(1); + BV2.reset(11); + EXPECT_NE(BV1, BV2); +} + +// A simple implementation of set union, used to double-check the human +// "expected" answer. +UBitVec simpleUnion(UBitVec::Allocator &Alloc, const UBitVec &LHS, + const UBitVec &RHS) { + UBitVec Union(Alloc); + for (unsigned Bit : LHS) + Union.test_and_set(Bit); + for (unsigned Bit : RHS) + Union.test_and_set(Bit); + return Union; +} + +TEST(CoalescingBitVectorTest, Union) { + UBitVec::Allocator Alloc; + + auto unionIs = [&](std::initializer_list LHS, + std::initializer_list RHS, + std::initializer_list Expected) { + UBitVec BV1(Alloc); + BV1.set(LHS); + UBitVec BV2(Alloc); + BV2.set(RHS); + const UBitVec &DoubleCheckedExpected = simpleUnion(Alloc, BV1, BV2); + ASSERT_TRUE(elementsMatch(DoubleCheckedExpected, Expected)); + BV1 |= BV2; + ASSERT_TRUE(elementsMatch(BV1, Expected)); + }; + + // Empty LHS. + unionIs({}, {1, 2, 3}, {1, 2, 3}); + + // Empty RHS. + unionIs({1, 2, 3}, {}, {1, 2, 3}); + + // Full overlap. + unionIs({1}, {1}, {1}); + unionIs({1, 2, 11, 12}, {1, 2, 11, 12}, {1, 2, 11, 12}); + + // Sliding window: fix {2, 3, 4} as the LHS, and slide a window before/after + // it. Repeat this swapping LHS and RHS. + unionIs({2, 3, 4}, {1, 2, 3}, {1, 2, 3, 4}); + unionIs({2, 3, 4}, {2, 3, 4}, {2, 3, 4}); + unionIs({2, 3, 4}, {3, 4, 5}, {2, 3, 4, 5}); + unionIs({1, 2, 3}, {2, 3, 4}, {1, 2, 3, 4}); + unionIs({3, 4, 5}, {2, 3, 4}, {2, 3, 4, 5}); + + // Multiple overlaps, but at least one of the overlaps forces us to split an + // interval (and possibly both do). For ease of understanding, fix LHS to be + // {1, 2, 11, 12}, but vary RHS. + unionIs({1, 2, 11, 12}, {1}, {1, 2, 11, 12}); + unionIs({1, 2, 11, 12}, {2}, {1, 2, 11, 12}); + unionIs({1, 2, 11, 12}, {11}, {1, 2, 11, 12}); + unionIs({1, 2, 11, 12}, {12}, {1, 2, 11, 12}); + unionIs({1, 2, 11, 12}, {1, 11}, {1, 2, 11, 12}); + unionIs({1, 2, 11, 12}, {1, 12}, {1, 2, 11, 12}); + unionIs({1, 2, 11, 12}, {2, 11}, {1, 2, 11, 12}); + unionIs({1, 2, 11, 12}, {2, 12}, {1, 2, 11, 12}); + unionIs({1, 2, 11, 12}, {1, 2, 11}, {1, 2, 11, 12}); + unionIs({1, 2, 11, 12}, {1, 2, 12}, {1, 2, 11, 12}); + unionIs({1, 2, 11, 12}, {1, 11, 12}, {1, 2, 11, 12}); + unionIs({1, 2, 11, 12}, {2, 11, 12}, {1, 2, 11, 12}); + unionIs({1, 2, 11, 12}, {0, 11, 12}, {0, 1, 2, 11, 12}); + unionIs({1, 2, 11, 12}, {3, 11, 12}, {1, 2, 3, 11, 12}); + unionIs({1, 2, 11, 12}, {1, 11, 13}, {1, 2, 11, 12, 13}); + unionIs({1, 2, 11, 12}, {1, 10, 11}, {1, 2, 10, 11, 12}); + + // Partial overlap, but the existing interval covers future overlaps. + unionIs({1, 2, 3, 4, 5, 6, 7, 8}, {2, 3, 4, 6, 7}, {1, 2, 3, 4, 5, 6, 7, 8}); +} + +// A simple implementation of set intersection, used to double-check the +// human "expected" answer. +UBitVec simpleIntersection(UBitVec::Allocator &Alloc, const UBitVec &LHS, + const UBitVec &RHS) { + UBitVec Intersection(Alloc); + for (unsigned Bit : LHS) + if (RHS.test(Bit)) + Intersection.set(Bit); + return Intersection; +} + +TEST(CoalescingBitVectorTest, Intersection) { + UBitVec::Allocator Alloc; + + // Check that after doing LHS &= RHS, LHS == Expected. + auto intersectionIs = [&](std::initializer_list LHS, + std::initializer_list RHS, + std::initializer_list Expected) { + UBitVec BV1(Alloc); + BV1.set(LHS); + UBitVec BV2(Alloc); + BV2.set(RHS); + const UBitVec &DoubleCheckedExpected = simpleIntersection(Alloc, BV1, BV2); + ASSERT_TRUE(elementsMatch(DoubleCheckedExpected, Expected)); + BV1 &= BV2; + ASSERT_TRUE(elementsMatch(BV1, Expected)); + }; + + // Empty case, one-element case. + intersectionIs({}, {}, {}); + intersectionIs({1}, {1}, {1}); + intersectionIs({1}, {2}, {}); + + // Exact overlaps cases: single overlap and multiple overlaps. + intersectionIs({1, 2}, {1, 2}, {1, 2}); + intersectionIs({1, 2, 11, 12}, {1, 2, 11, 12}, {1, 2, 11, 12}); + + // Sliding window: fix {2, 3, 4} as the LHS, and slide a window before/after + // it. Repeat this swapping LHS and RHS. + intersectionIs({2, 3, 4}, {1, 2, 3}, {2, 3}); + intersectionIs({2, 3, 4}, {2, 3, 4}, {2, 3, 4}); + intersectionIs({2, 3, 4}, {3, 4, 5}, {3, 4}); + intersectionIs({1, 2, 3}, {2, 3, 4}, {2, 3}); + intersectionIs({3, 4, 5}, {2, 3, 4}, {3, 4}); + + // No overlap, but we have multiple intervals. + intersectionIs({1, 2, 11, 12}, {3, 4, 13, 14}, {}); + + // Multiple overlaps, but at least one of the overlaps forces us to split an + // interval (and possibly both do). For ease of understanding, fix LHS to be + // {1, 2, 11, 12}, but vary RHS. + intersectionIs({1, 2, 11, 12}, {1}, {1}); + intersectionIs({1, 2, 11, 12}, {2}, {2}); + intersectionIs({1, 2, 11, 12}, {11}, {11}); + intersectionIs({1, 2, 11, 12}, {12}, {12}); + intersectionIs({1, 2, 11, 12}, {1, 11}, {1, 11}); + intersectionIs({1, 2, 11, 12}, {1, 12}, {1, 12}); + intersectionIs({1, 2, 11, 12}, {2, 11}, {2, 11}); + intersectionIs({1, 2, 11, 12}, {2, 12}, {2, 12}); + intersectionIs({1, 2, 11, 12}, {1, 2, 11}, {1, 2, 11}); + intersectionIs({1, 2, 11, 12}, {1, 2, 12}, {1, 2, 12}); + intersectionIs({1, 2, 11, 12}, {1, 11, 12}, {1, 11, 12}); + intersectionIs({1, 2, 11, 12}, {2, 11, 12}, {2, 11, 12}); + intersectionIs({1, 2, 11, 12}, {0, 11, 12}, {11, 12}); + intersectionIs({1, 2, 11, 12}, {3, 11, 12}, {11, 12}); + intersectionIs({1, 2, 11, 12}, {1, 11, 13}, {1, 11}); + intersectionIs({1, 2, 11, 12}, {1, 10, 11}, {1, 11}); + + // Partial overlap, but the existing interval covers future overlaps. + intersectionIs({1, 2, 3, 4, 5, 6, 7, 8}, {2, 3, 4, 6, 7}, {2, 3, 4, 6, 7}); +} + +// A simple implementation of set intersection-with-complement, used to +// double-check the human "expected" answer. +UBitVec simpleIntersectionWithComplement(UBitVec::Allocator &Alloc, + const UBitVec &LHS, + const UBitVec &RHS) { + UBitVec Intersection(Alloc); + for (unsigned Bit : LHS) + if (!RHS.test(Bit)) + Intersection.set(Bit); + return Intersection; +} + +TEST(CoalescingBitVectorTest, IntersectWithComplement) { + UBitVec::Allocator Alloc; + + // Check that after doing LHS.intersectWithComplement(RHS), LHS == Expected. + auto intersectionWithComplementIs = + [&](std::initializer_list LHS, + std::initializer_list RHS, + std::initializer_list Expected) { + UBitVec BV1(Alloc); + BV1.set(LHS); + UBitVec BV2(Alloc); + BV2.set(RHS); + const UBitVec &DoubleCheckedExpected = + simpleIntersectionWithComplement(Alloc, BV1, BV2); + ASSERT_TRUE(elementsMatch(DoubleCheckedExpected, Expected)); + BV1.intersectWithComplement(BV2); + ASSERT_TRUE(elementsMatch(BV1, Expected)); + }; + + // Empty case, one-element case. + intersectionWithComplementIs({}, {}, {}); + intersectionWithComplementIs({1}, {1}, {}); + intersectionWithComplementIs({1}, {2}, {1}); + + // Exact overlaps cases: single overlap and multiple overlaps. + intersectionWithComplementIs({1, 2}, {1, 2}, {}); + intersectionWithComplementIs({1, 2, 11, 12}, {1, 2, 11, 12}, {}); + + // Sliding window: fix {2, 3, 4} as the LHS, and slide a window before/after + // it. Repeat this swapping LHS and RHS. + intersectionWithComplementIs({2, 3, 4}, {1, 2, 3}, {4}); + intersectionWithComplementIs({2, 3, 4}, {2, 3, 4}, {}); + intersectionWithComplementIs({2, 3, 4}, {3, 4, 5}, {2}); + intersectionWithComplementIs({1, 2, 3}, {2, 3, 4}, {1}); + intersectionWithComplementIs({3, 4, 5}, {2, 3, 4}, {5}); + + // No overlap, but we have multiple intervals. + intersectionWithComplementIs({1, 2, 11, 12}, {3, 4, 13, 14}, {1, 2, 11, 12}); + + // Multiple overlaps. For ease of understanding, fix LHS to be + // {1, 2, 11, 12}, but vary RHS. + intersectionWithComplementIs({1, 2, 11, 12}, {1}, {2, 11, 12}); + intersectionWithComplementIs({1, 2, 11, 12}, {2}, {1, 11, 12}); + intersectionWithComplementIs({1, 2, 11, 12}, {11}, {1, 2, 12}); + intersectionWithComplementIs({1, 2, 11, 12}, {12}, {1, 2, 11}); + intersectionWithComplementIs({1, 2, 11, 12}, {1, 11}, {2, 12}); + intersectionWithComplementIs({1, 2, 11, 12}, {1, 12}, {2, 11}); + intersectionWithComplementIs({1, 2, 11, 12}, {2, 11}, {1, 12}); + intersectionWithComplementIs({1, 2, 11, 12}, {2, 12}, {1, 11}); + intersectionWithComplementIs({1, 2, 11, 12}, {1, 2, 11}, {12}); + intersectionWithComplementIs({1, 2, 11, 12}, {1, 2, 12}, {11}); + intersectionWithComplementIs({1, 2, 11, 12}, {1, 11, 12}, {2}); + intersectionWithComplementIs({1, 2, 11, 12}, {2, 11, 12}, {1}); + intersectionWithComplementIs({1, 2, 11, 12}, {0, 11, 12}, {1, 2}); + intersectionWithComplementIs({1, 2, 11, 12}, {3, 11, 12}, {1, 2}); + intersectionWithComplementIs({1, 2, 11, 12}, {1, 11, 13}, {2, 12}); + intersectionWithComplementIs({1, 2, 11, 12}, {1, 10, 11}, {2, 12}); + + // Partial overlap, but the existing interval covers future overlaps. + intersectionWithComplementIs({1, 2, 3, 4, 5, 6, 7, 8}, {2, 3, 4, 6, 7}, + {1, 5, 8}); +} + +TEST(CoalescingBitVectorTest, FindLowerBound) { + U64BitVec::Allocator Alloc; + U64BitVec BV(Alloc); + uint64_t BigNum1 = uint64_t(1) << 32; + uint64_t BigNum2 = (uint64_t(1) << 33) + 1; + EXPECT_EQ(BV.find(BigNum1), BV.end()); + BV.set(BigNum1); + auto Find1 = BV.find(BigNum1); + EXPECT_EQ(*Find1, BigNum1); + BV.set(BigNum2); + auto Find2 = BV.find(BigNum1); + EXPECT_EQ(*Find2, BigNum1); + auto Find3 = BV.find(BigNum2); + EXPECT_EQ(*Find3, BigNum2); + BV.reset(BigNum1); + auto Find4 = BV.find(BigNum1); + EXPECT_EQ(*Find4, BigNum2); + + BV.clear(); + BV.set({1, 2, 3}); + EXPECT_EQ(*BV.find(2), 2u); + EXPECT_EQ(*BV.find(3), 3u); +} + +TEST(CoalescingBitVectorTest, Print) { + std::string S; + { + raw_string_ostream OS(S); + UBitVec::Allocator Alloc; + UBitVec BV(Alloc); + BV.set({1}); + BV.print(OS); + + BV.clear(); + BV.set({1, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}); + BV.print(OS); + } + EXPECT_EQ(S, "{[1]}" + "{[1][11, 20]}"); +} + +} // namespace Index: llvm/unittests/ADT/IntervalMapTest.cpp =================================================================== --- llvm/unittests/ADT/IntervalMapTest.cpp +++ llvm/unittests/ADT/IntervalMapTest.cpp @@ -51,6 +51,16 @@ EXPECT_TRUE(I2 == CI); } +// Test one-element closed ranges. +TEST(IntervalMapTest, OneElementRanges) { + UUMap::Allocator allocator; + UUMap map(allocator); + map.insert(1, 1, 1); + map.insert(2, 2, 2); + EXPECT_EQ(1u, map.lookup(1)); + EXPECT_EQ(2u, map.lookup(2)); +} + // Single entry map tests TEST(IntervalMapTest, SingleEntryMap) { UUMap::Allocator allocator;