Index: include/llvm/ADT/DisjointSetUnion.h =================================================================== --- include/llvm/ADT/DisjointSetUnion.h +++ include/llvm/ADT/DisjointSetUnion.h @@ -0,0 +1,110 @@ +//===- DisjointSetUnion.h - A structure for merging sets -------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_DISJOINTSETUNION_H +#define LLVM_ADT_DISJOINTSETUNION_H + +#include "DenseMap.h" + +namespace llvm { + +// This is a data structure that allows to respond the following queries: +// +// 1) For given two elements X and Y, merge the set containing X and the set +// containing Y into their union. +// 2) Check if given two elements X and Y belong to one set. +// +// Before any statement of type 1) is made, every elements belongs to its own +// set (containing only this sole element). Thus, all universe of entities of +// type T is a union of disjoint sets that can be merged. +// +// We represent our sets as trees with exactly one vertex that is reachable from +// all other vertices of this set. This vertex is called head, and every edge +// goes from some vertex V to Parent[V]. Every vertex is either a head of its +// set, or it has an immediate parent vertex. +// +// To reach good complexity of queries, we use two heuristics. +// First, we compress paths in trees (attaching all traversed nodes immediately +// to the head) on each query of type 1). +// Second, when merging trees during query 2), we attach the head of a smaller +// tree to the bigger tree. +// These two heuristics combined allow us to reach complexity which is close to +// O(1) for both queries. +template +class DisjointSetUnion { +public: + // Check whether or not X and Y belong to one set after merges made so far. + bool isInSameSet(T X, T Y) { + return head(X) == head(Y); + } + + // Merge the set that contains X and the set that contains Y into a new set M, + // which is a union of these sets. + void mergeSetsOf(T X, T Y) { + T H1 = head(X); + T H2 = head(Y); + if (H1 != H2) { + // If there were two different sets, merge them. Use ranking heuristic: + // attach a smaller tree to a bigger one. + int R1 = rank(H1); + int R2 = rank(H2); + if (R1 > R2) + std::swap(H1, H2); + // Attach head of the smaller set immediately to the head of bigger one. + Parent[H1] = H2; + // H1 is no longer a head and will never become one. We don't have to + // store a map instance for it. + Rank.erase(H1); + // Now the set of H2 contains all elements of old sets of H1 and H2. + Rank[H2] = R1 + R2; + } + } + + // Clear the information about merges and state that now this DSU is returned + // to its initial state, meaning that every vertex belongs to its own set + // containing this sole element. + void clear() { + Parent.clear(); + Rank.clear(); + } + +private: + // For a given vertex X, return the head of its set. + T head(T X) { + auto It = Parent.find(X); + if (It != Parent.end()) + // Head is Parent(Parent(...(Parent(X))...). After we find it, we can + // compress this path making Head the immediate parent of X. We do so + // recursively for all chain of parents. + return Parent[X] = head(It->second); + // Every vertex that doesn't have a parent is the head of its set. + return X; + } + + // Calculates rank of a head vertex X. Rank of a head is the number of + // elements in its set. + int rank(T X) { + assert(head(X) == X && "Attempt to calculate rank for non-head vertex?"); + auto It = Rank.find(X); + if (It != Rank.end()) { + assert(It->second > 1 && "Storing rank of one-element set in the map?"); + return It->second; + } + // This set consists of sole element X. + return 1; + } + + // Stores immediate parents of vertices. + DenseMap Parent; + // Memorizes calculated ranks of head vertices. + DenseMap Rank; +}; + +} // end namespace llvm + +#endif // LLVM_ADT_DISJOINTSETUNION_H Index: unittests/ADT/CMakeLists.txt =================================================================== --- unittests/ADT/CMakeLists.txt +++ unittests/ADT/CMakeLists.txt @@ -16,6 +16,7 @@ DenseMapTest.cpp DenseSetTest.cpp DepthFirstIteratorTest.cpp + DisjointSetUnionTest.cpp FoldingSet.cpp FunctionRefTest.cpp HashingTest.cpp Index: unittests/ADT/DisjointSetUnionTest.cpp =================================================================== --- unittests/ADT/DisjointSetUnionTest.cpp +++ unittests/ADT/DisjointSetUnionTest.cpp @@ -0,0 +1,99 @@ +//=== llvm/unittest/ADT/DisjointSetUnionTest.cpp - DSU structure tests ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DisjointSetUnion.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace llvm { + +TEST(DisjointSetUnionTest, NoMerges) { + DisjointSetUnion DSU; + // Until we merged any sets, check that every element belongs to its own set + // that contains this sole element. + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + if (i == j) + EXPECT_TRUE(DSU.isInSameSet(i, j)); + else + EXPECT_FALSE(DSU.isInSameSet(i, j)); +} + +TEST(DisjointSetUnionTest, SimpleMerge1) { + DisjointSetUnion DSU; + // Check that once we merge (A, B), (B, C), (C, D), then all elements belong + // to one set. + DSU.mergeSetsOf(0, 1); + DSU.mergeSetsOf(1, 2); + DSU.mergeSetsOf(2, 3); + for (int i = 0; i < 4; ++i) + for (int j = 0; j < 4; ++j) + EXPECT_TRUE(DSU.isInSameSet(i, j)); +} + +TEST(DisjointSetUnionTest, SimpleMerge2) { + DisjointSetUnion DSU; + // Check that once we merge (A, B), (C, D), (A, C), then all elements belong + // to one set. + DSU.mergeSetsOf(0, 1); + DSU.mergeSetsOf(2, 3); + DSU.mergeSetsOf(0, 2); + for (int i = 0; i < 4; ++i) + for (int j = 0; j < 4; ++j) + EXPECT_TRUE(DSU.isInSameSet(i, j)); +} + +TEST(DisjointSetUnionTest, Clear) { + DisjointSetUnion DSU; + // Check that reset works. + DSU.mergeSetsOf(0, 1); + DSU.mergeSetsOf(1, 2); + DSU.clear(); + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + if (i == j) + EXPECT_TRUE(DSU.isInSameSet(i, j)); + else + EXPECT_FALSE(DSU.isInSameSet(i, j)); +} + +TEST(DisjointSetUnionTest, TwoSets) { + DisjointSetUnion DSU; + // Form sets of odd and even numbers, check that we split them into these + // two sets correcrly. + for (int i = 0; i < 30; i += 2) + DSU.mergeSetsOf(0, i); + for (int i = 1; i < 30; i += 2) + DSU.mergeSetsOf(1, i); + + for (int i = 0; i < 30; i++) + for (int j = 0; j < 30; j++) + if (i % 2 == j % 2) + EXPECT_TRUE(DSU.isInSameSet(i, j)); + else + EXPECT_FALSE(DSU.isInSameSet(i, j)); +} + +TEST(DisjointSetUnionTest, MultipleSets) { + DisjointSetUnion DSU; + // Split numbers from [0, 100) into sets so that values in the same set have + // equal remainders (mod 17). + for (int i = 0; i < 100; i++) + DSU.mergeSetsOf(i % 17, i); + + for (int i = 0; i < 100; i++) + for (int j = 0; j < 100; j++) + if (i % 17 == j % 17) + EXPECT_TRUE(DSU.isInSameSet(i, j)); + else + EXPECT_FALSE(DSU.isInSameSet(i, j)); +} + +} // llvm