diff --git a/llvm/include/llvm/ADT/FoldingSet.h b/llvm/include/llvm/ADT/FoldingSet.h --- a/llvm/include/llvm/ADT/FoldingSet.h +++ b/llvm/include/llvm/ADT/FoldingSet.h @@ -371,6 +371,8 @@ return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash(); } + inline unsigned size() const { return Bits.size(); } + /// operator== - Used to compare two nodes to each other. bool operator==(const FoldingSetNodeID &RHS) const; bool operator==(const FoldingSetNodeIDRef RHS) const; @@ -387,6 +389,103 @@ /// given allocator and return a FoldingSetNodeIDRef describing the /// interned data. FoldingSetNodeIDRef Intern(BumpPtrAllocator &Allocator) const; + + friend class FoldingSetNodeIDMatcher; +}; + +/// FoldingSetNodeIDMatcher - Tracks progress in matching a FoldingSetNodeID. +/// The constructor is given the ID to match and then Match* methods attempt +/// to match against this ID incrementally. This class eases short +/// circuiting of ID matching. +class FoldingSetNodeIDMatcher { + const FoldingSetNodeID &IDToMatch; + FoldingSetNodeID TempID; + unsigned I = 0; // Next profile index to match. + +public: + FoldingSetNodeIDMatcher(const FoldingSetNodeID &IDToMatch) + : IDToMatch{IDToMatch} {} + + /// Completed - Check that the full ID has been matched. Call this after + /// successfuly matching all components of the profile to ensure that + /// the ID to be matched has nothing remaining. + bool Completed() { return I == IDToMatch.size(); } + + /// getID - Provides direct access to the temporary ID being used for the + /// next incremental match. Load a profile into this ID and then call + /// Match to check it. + FoldingSetNodeID &getID() { + TempID.clear(); + return TempID; + } + + /// Match - Tests the contents of the temporary ID against the ID to match. + bool Match() { + if (I + TempID.Bits.size() - 1 >= IDToMatch.size()) + return false; + + for (unsigned K = 0; K < TempID.size(); ++K) { + if (TempID.Bits[K] != IDToMatch.Bits[I + K]) + return false; + } + + I += TempID.size(); + return true; + } + + template inline bool Match(const T &X) { + TempID.clear(); + X.Profile(TempID); + return Match(); + } + + bool MatchPointer(const void *Ptr) { + TempID.clear(); + TempID.AddPointer(Ptr); + return Match(); + } + + bool MatchInteger(signed I) { + TempID.clear(); + TempID.AddInteger(I); + return Match(); + } + + bool MatchInteger(unsigned I) { + TempID.clear(); + TempID.AddInteger(I); + return Match(); + } + + bool MatchInteger(long I) { + TempID.clear(); + TempID.AddInteger(I); + return Match(); + } + + bool MatchInteger(unsigned long I) { + TempID.clear(); + TempID.AddInteger(I); + return Match(); + } + + bool MatchInteger(long long I) { + TempID.clear(); + TempID.AddInteger(I); + return Match(); + } + + bool MatchInteger(unsigned long long I) { + TempID.clear(); + TempID.AddInteger(I); + return Match(); + } + + bool MatchBoolean(bool B) { + TempID.clear(); + TempID.AddBoolean(B); + return Match(); + } }; // Convenience type to hide the implementation of the folding set. @@ -622,6 +721,215 @@ Ctx getContext() const { return Context; } }; +//===----------------------------------------------------------------------===// +/// NonintrusiveFoldingSetBase - Type-erased implementation for +/// NonintrusiveFoldingSet and ContextualNonintrusiveFoldingSet. Like +/// FoldingSet, it is useful for uniquing expensive-to-create objects, but it +/// implements that behavior differently. It is faster while being memory +/// neutral; this implementation was substituted for some very large FoldingSets +/// in Clang and memory usage regressed by less than 1%. +/// +/// Differences: +/// +/// * Rebucketing does not reprofile or rehash. CAUTION: This behavior more +/// quickly exposes buggy profiling when a node no longer hashes to its +/// current bucket, whereas the reprofiling and rehashing done by FoldingSet +/// masks these errors. +/// * Rebucketing is done "in-place" and moves on average half of the nodes +/// instead of all of them. +/// * When searching for a node, the hash is compared first, which can avoid +/// comparing a lengthy profile. +/// * If a profile comparision is needed, the FoldingSetNodeIDMatcher class +/// supports short-circuiting the comparison at the first non-matching word. +/// * The InsertPos obtained by a FindNodeOrInsertPos call is the hash, +/// instead of a bucket pointer, so it remains valid even if another +/// insertion is made first. In some use-cases, this avoids needing to redo +/// the search to update the InsertPos. +/// * It does not require node types to inherit from FoldingSetNode. +/// * In addition to Profile(), the node type must implement MatchProfile(), +/// which determines if the node's profile matches an existing profile. +/// MatchProfile() must agree with Profile() and should return as early as +/// possible for a non-match. +class NonintrusiveFoldingSetBase { + // Node for a singly-linked list. + struct Node { + void *Ptr; // A pointer that was inserted into the set. + unsigned Hash; // Hash of Ptr->Profile(). + unsigned Next; // Index of next node in Nodes vector. + + Node() : Ptr{nullptr}, Hash{0u}, Next{0u} {} + + Node(void *Ptr, unsigned Hash, unsigned Next) + : Ptr{Ptr}, Hash{Hash}, Next{Next} {} + }; + + // All nodes are stored in this vector and use vector indices as links. + // Index 0 is left unused by convention; this wastes memory for one node, but + // simplifies other logic by letting us use 0 as a null link. + SmallVector Nodes; + + // Each bucket is represented as the Nodes index of the first Node in the + // bucket. + SmallVector Buckets; + + // A "bucket" of nodes that have been removed. The set recycles these before + // growing the Nodes vector. + unsigned FreeList; + + // The number of active nodes in the Nodes vector. + unsigned NumNodes; + +protected: + explicit NonintrusiveFoldingSetBase(unsigned Log2NumBuckets) + : Nodes(1), Buckets(1u << Log2NumBuckets), FreeList{0u}, NumNodes{0u} {} + + static unsigned ToBucket(unsigned Hash, unsigned N) { return Hash & (N - 1); } + + unsigned getBucket(unsigned B) const { return Buckets[B]; } + + Node *getNode(unsigned I) { return &Nodes[I]; } + + unsigned getNumBuckets() const { return Buckets.size(); } + + /// Grow - Increase the number of buckets and redistribute existing + /// nodes among the new buckets. + void Grow(unsigned NewNumBuckets); + + /// InsertNode - Inserts pointer N into the set. N is assumed to not + /// be in the set and InsertPos is its hash. + void InsertNode(void *N, unsigned InsertPos); + + /// RemoveNode - Removes pointer N from the set if it is found and + /// returns true. Returns false if it cannot be found. RemovePos + /// is its hash. + bool RemoveNode(void *N, unsigned RemovePos); + +public: + /// capacity - Returns the number of items that can be stored + /// in the set before rebucketing occurs. + unsigned capacity() const { return getNumBuckets() * 2; } + + /// clear - Remove all items from the set. + void clear(); + + /// empty - Test if the set is empty. + bool empty() const { return NumNodes == 0; } + + /// size - Returns the number of items currently in the set. + unsigned size() const { return NumNodes; } +}; + +//===----------------------------------------------------------------------===// +/// NonintrusiveFoldingSetImpl - Puts a type-safe interface on the +/// type-erased NonintrusiveFoldingSetBase to provide code common to +/// NonintrusiveFoldingSet and ContextualNonintrusiveFoldingSetImpl. +template +class NonintrusiveFoldingSetImpl : public NonintrusiveFoldingSetBase { + using Super = NonintrusiveFoldingSetBase; + +protected: + explicit NonintrusiveFoldingSetImpl(unsigned Log2NumBuckets) + : NonintrusiveFoldingSetBase(Log2NumBuckets) {} + +public: + /// FindNodeOrInsertPos - Determines if a node matching ID is in the set. + /// If it is, returns it. If it is not, sets InsertPos to value that is needed + /// for InsertNode insert it. + T *FindNodeOrInsertPos(const FoldingSetNodeID &ID, unsigned &InsertPos) { + const unsigned Hash = ID.ComputeHash(); + const unsigned B = ToBucket(Hash, getNumBuckets()); + + unsigned NodeIndex = getBucket(B); + while (NodeIndex) { + Node *Node = getNode(NodeIndex); + if (Node->Hash == Hash) { + auto *N = static_cast(Node->Ptr); + if (static_cast(this)->Match(N, ID)) + return N; + } + NodeIndex = Node->Next; + } + + InsertPos = Hash; + return nullptr; + } + + /// GetOrInsertNode - If there is an existing simple Node exactly + /// equal to the specified node, return it. Otherwise, insert 'N' and + /// return it instead. + T *GetOrInsertNode(T *N) { + unsigned InsertPos = 0; + FoldingSetNodeID ID; + N->Profile(ID); + if (T *R = FindNodeOrInsertPos(ID, InsertPos)) + return R; + + InsertNode(N, InsertPos); + return N; + } + + /// InsertNode - Insert the specified node into the folding set, knowing that + /// it is not already in the folding set. + void InsertNode(T *N) { + T *Inserted = GetOrInsertNode(N); + (void)Inserted; + assert(Inserted == N && "Node already inserted!"); + } + + /// InsertNode - Inserts pointer N into the set. N is assumed to not + /// be in the set and InsertPos must have been obtained by calling + /// FindNodeOrInsertPos. + void InsertNode(T *N, unsigned InsertPos) { Super::InsertNode(N, InsertPos); } + + /// RemoveNode - Attempts to remove pointer N from the set. Returns true + /// if the pointer was found and removed, otherwise returns false. + bool RemoveNode(T *N) { + FoldingSetNodeID ID; + N->Profile(ID); + + return Super::RemoveNode(N, ID.ComputeHash()); + } +}; + +//===----------------------------------------------------------------------===// +/// NonintrusiveFoldingSet - +template +class NonintrusiveFoldingSet + : public NonintrusiveFoldingSetImpl> { + using Super = NonintrusiveFoldingSetImpl; + friend Super; // Lets Super call Match. + + /// Match - Returns the result of the node's MatchProfile function. + bool Match(T *N, const FoldingSetNodeID &ID) const { + return N->MatchProfile(ID); + } + +public: + explicit NonintrusiveFoldingSet(unsigned Log2NumBuckets = 6) + : Super(Log2NumBuckets) {} +}; + +template +class ContextualNonintrusiveFoldingSet + : public NonintrusiveFoldingSetImpl< + T, ContextualNonintrusiveFoldingSet> { + using Super = NonintrusiveFoldingSetImpl; + friend Super; // Lets Super call Match. + + Ctx Context; + + /// Match - Returns the result of the node's MatchProfile function. The + /// Context is passed to it. + bool Match(T *N, const FoldingSetNodeID &ID) const { + return N->MatchProfile(ID, Context); + } + +public: + explicit ContextualNonintrusiveFoldingSet(Ctx Context, + unsigned Log2NumBuckets = 6) + : Super(Log2NumBuckets), Context{Context} {} +}; + //===----------------------------------------------------------------------===// /// FoldingSetVector - This template class combines a FoldingSet and a vector /// to provide the interface of FoldingSet but with deterministic iteration diff --git a/llvm/lib/Support/FoldingSet.cpp b/llvm/lib/Support/FoldingSet.cpp --- a/llvm/lib/Support/FoldingSet.cpp +++ b/llvm/lib/Support/FoldingSet.cpp @@ -422,3 +422,98 @@ FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) { Ptr = (!*Bucket || !GetNextPtr(*Bucket)) ? (void*) Bucket : *Bucket; } + +void NonintrusiveFoldingSetBase::clear() { + std::fill(Buckets.begin(), Buckets.end(), 0u); + Nodes.resize(1); + NumNodes = 0; + FreeList = 0; +} + +void NonintrusiveFoldingSetBase::Grow(unsigned NewNumBuckets) { + const unsigned OldNumBuckets = getNumBuckets(); + assert(NewNumBuckets > OldNumBuckets); + Buckets.resize(NewNumBuckets); + + // Rebucket about half of the existing nodes. When doubling the number + // of buckets, we use one more bit from the hash. If this bit is 0, then + // the node stays in its current bucket. If this bit is 1, then the node + // moves to a bucket in the upper half of Buckets. + for (unsigned i = 0; i < OldNumBuckets; ++i) { + unsigned NodeIndex = Buckets[i]; + unsigned PrevIndex = 0; + + while (NodeIndex) { + Node *N = &Nodes[NodeIndex]; + const unsigned NextIndex = N->Next; + + unsigned B = ToBucket(N->Hash, NewNumBuckets); + if (B < OldNumBuckets) { + // Stays in this bucket. + PrevIndex = NodeIndex; + } else { + // Moves to a new bucket. Detach it first. + if (PrevIndex) + Nodes[PrevIndex].Next = NextIndex; + else + Buckets[i] = NextIndex; + + // Attach to new bucket. + N->Next = Buckets[B]; + Buckets[B] = NodeIndex; + } + + NodeIndex = NextIndex; + } + } +} + +void NonintrusiveFoldingSetBase::InsertNode(void *N, unsigned InsertPos) { + if (size() + 1 > capacity()) { + Grow(getNumBuckets() * 2); + } + + unsigned B = ToBucket(InsertPos, getNumBuckets()); + + if (FreeList) { + // Recycle a removed node. + unsigned Next = Nodes[FreeList].Next; + Nodes[FreeList] = Node(N, InsertPos, Buckets[B]); + FreeList = Next; + } else { + // Make a new node that links to the first node of bucket B. + Nodes.emplace_back(N, InsertPos, Buckets[B]); + // Let that new node become the first node of bucket B. + Buckets[B] = Nodes.size() - 1; + } + + ++NumNodes; +} + +bool NonintrusiveFoldingSetBase::RemoveNode(void *N, unsigned RemovePos) { + unsigned B = ToBucket(RemovePos, getNumBuckets()); + unsigned NodeIndex = Buckets[B]; + + unsigned PrevIndex = 0; + while (NodeIndex) { + const unsigned NextIndex = Nodes[NodeIndex].Next; + + if (Nodes[NodeIndex].Ptr == N) { + if (PrevIndex) + Nodes[PrevIndex].Next = NextIndex; + else + Buckets[B] = NextIndex; + + Nodes[NodeIndex].Next = FreeList; + FreeList = NodeIndex; + + --NumNodes; + return true; + } + + PrevIndex = NodeIndex; + NodeIndex = NextIndex; + } + + return false; +} diff --git a/llvm/unittests/ADT/FoldingSet.cpp b/llvm/unittests/ADT/FoldingSet.cpp --- a/llvm/unittests/ADT/FoldingSet.cpp +++ b/llvm/unittests/ADT/FoldingSet.cpp @@ -188,5 +188,110 @@ EXPECT_EQ(Trivial.capacity(), OldCapacity); } +struct Pair { + unsigned Key = 0; + unsigned Value = 0; + Pair(unsigned K, unsigned V) : Key(K), Value(V) {} + + void Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(Key); + ID.AddInteger(Value); + } + + bool MatchProfile(const FoldingSetNodeID &ID) const { + FoldingSetNodeIDMatcher M(ID); + return M.MatchInteger(Key) && M.MatchInteger(Value) && M.Completed(); + } +}; + +TEST(NonintrusiveFoldingSetTest, IDComparison) { + NonintrusiveFoldingSet Trivial; + + Pair T(99, 42); + Trivial.InsertNode(&T); + + unsigned InsertPos = 0; // arbitrary + FoldingSetNodeID ID; + T.Profile(ID); + Pair *N = Trivial.FindNodeOrInsertPos(ID, InsertPos); + EXPECT_EQ(&T, N); + EXPECT_EQ(0, InsertPos); // unmodified +} + +TEST(NonintrusiveFoldingSetTest, MissedIDComparison) { + NonintrusiveFoldingSet Trivial; + + Pair S(100, 42); + Pair T(99, 42); + Trivial.InsertNode(&T); + + unsigned InsertPos = 0; + FoldingSetNodeID ID; + S.Profile(ID); + Pair *N = Trivial.FindNodeOrInsertPos(ID, InsertPos); + EXPECT_EQ(nullptr, N); + // InsertPos is a hash and may be any value. +} + +TEST(NonintrusiveFoldingSetTest, RemoveNodeThatIsPresent) { + NonintrusiveFoldingSet Trivial; + + Pair T(99, 42); + Trivial.InsertNode(&T); + EXPECT_EQ(Trivial.size(), 1U); + + bool WasThere = Trivial.RemoveNode(&T); + EXPECT_TRUE(WasThere); + EXPECT_EQ(0U, Trivial.size()); +} + +TEST(NonintrusiveFoldingSetTest, RemoveNodeThatIsAbsent) { + NonintrusiveFoldingSet Trivial; + + Pair T(99, 42); + bool WasThere = Trivial.RemoveNode(&T); + EXPECT_FALSE(WasThere); + EXPECT_EQ(0U, Trivial.size()); +} + +TEST(NonintrusiveFoldingSetTest, InsertAtPos) { + NonintrusiveFoldingSet Trivial; + + unsigned InsertPos = 0; + Pair Finder(99, 42); + FoldingSetNodeID ID; + Finder.Profile(ID); + Trivial.FindNodeOrInsertPos(ID, InsertPos); + + Pair T(99, 42); + Trivial.InsertNode(&T, InsertPos); + EXPECT_EQ(1U, Trivial.size()); +} + +TEST(NonintrusiveFoldingSetTest, EmptyIsTrue) { + NonintrusiveFoldingSet Trivial; + EXPECT_TRUE(Trivial.empty()); +} + +TEST(NonintrusiveFoldingSetTest, EmptyIsFalse) { + NonintrusiveFoldingSet Trivial; + Pair T(99, 42); + Trivial.InsertNode(&T); + EXPECT_FALSE(Trivial.empty()); +} + +TEST(NonintrusiveFoldingSetTest, ClearOnEmpty) { + NonintrusiveFoldingSet Trivial; + Trivial.clear(); + EXPECT_TRUE(Trivial.empty()); +} + +TEST(NonintrusiveFoldingSetTest, ClearOnNonEmpty) { + NonintrusiveFoldingSet Trivial; + Pair T(99, 42); + Trivial.InsertNode(&T); + Trivial.clear(); + EXPECT_TRUE(Trivial.empty()); } +} // namespace