Index: lib/esan/esan_hashtable.h =================================================================== --- /dev/null +++ lib/esan/esan_hashtable.h @@ -0,0 +1,252 @@ +//===-- esan_hashtable.h ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of EfficiencySanitizer, a family of performance tuners. +// +// Generic resizing hashtable. +//===----------------------------------------------------------------------===// + +#include "sanitizer_common/sanitizer_allocator_internal.h" +#include "sanitizer_common/sanitizer_internal_defs.h" +#include "sanitizer_common/sanitizer_mutex.h" +#include + +namespace __esan { + +//===----------------------------------------------------------------------===// +// Default hash and comparison functions +//===----------------------------------------------------------------------===// + +template struct DefaultHash { + size_t operator()(const T &Key) const { + return (size_t)Key; + } +}; + +template struct DefaultEqual { + bool operator()(const T &Key1, const T &Key2) const { + return Key1 == Key2; + } +}; + +//===----------------------------------------------------------------------===// +// HashTable declaration +//===----------------------------------------------------------------------===// + +// A simple resizing and mutex-locked hashtable. +// +// If the default hash functor is used, KeyTy must have an operator size_t(). +// If the default comparison functor is used, KeyTy must have an operator ==. +// +// By default all operations are internally-synchronized with a mutex, with no +// synchronization for payloads once hashtable functions return. If +// ExternalLock is set to true, the caller should call the lock() and unlock() +// routines around all hashtable operations and subsequent manipulation of +// payloads. +template , + typename EqualFuncTy = DefaultEqual > class HashTable { + public: + // InitialCapacity must be a power of 2. + // ResizeFactor must be between 1 and 99 and indicates the + // maximum percentage full that the table should ever be. + HashTable(u32 InitialCapacity = 2048, u32 ResizeFactor = 70); + ~HashTable(); + bool lookup(const KeyTy &Key, DataTy &Payload); // Const except for Mutex. + bool add(const KeyTy &Key, const DataTy &Payload); + bool remove(const KeyTy &Key); + u32 size(); // Const except for Mutex. + // If the table is internally-synchronized, this lock must not be held + // while a hashtable function is called as it will deadlock: the lock + // is not recursive. This is meant for use with externally-synchronized + // tables. + void lock(); + void unlock(); + + private: + void resize(); + + struct HashEntry { + KeyTy Key; + DataTy Payload; + HashEntry *Next; + }; + + HashEntry **Table; + u32 Capacity; + u32 Entries; + const u32 ResizeFactor; + BlockingMutex Mutex; + const HashFuncTy HashFunc; + const EqualFuncTy EqualFunc; +}; + +//===----------------------------------------------------------------------===// +// Hashtable implementation +//===----------------------------------------------------------------------===// + +template +HashTable:: +HashTable(u32 InitialCapacity, u32 ResizeFactor) : + Capacity(InitialCapacity), Entries(0), ResizeFactor(ResizeFactor), + HashFunc(HashFuncTy()), EqualFunc(EqualFuncTy()) { + CHECK(IsPowerOfTwo(Capacity)); + CHECK(ResizeFactor >= 1 && ResizeFactor <= 99); + Table = (HashEntry**)InternalAlloc(Capacity*sizeof(HashEntry*)); + internal_memset(Table, 0, Capacity*sizeof(HashEntry*)); +} + +template +HashTable::~HashTable() { + for (u32 i = 0; i < Capacity; ++i) { + HashEntry *Entry = Table[i]; + while (Entry != nullptr) { + HashEntry *Next = Entry->Next; + Entry->Payload.~DataTy(); + InternalFree(Entry); + Entry = Next; + } + } + InternalFree(Table); +} + +template +u32 HashTable::size() { + u32 Res; + if (!ExternalLock) + Mutex.Lock(); + Res = Entries; + if (!ExternalLock) + Mutex.Unlock(); + return Res; +} + +template +bool HashTable:: +lookup(const KeyTy &Key, DataTy &Payload) { + if (!ExternalLock) + Mutex.Lock(); + bool Found = false; + size_t Hash = HashFunc(Key) % Capacity; + HashEntry *Entry = Table[Hash]; + for (; Entry != nullptr; Entry = Entry->Next) { + if (EqualFunc(Entry->Key, Key)) { + Payload = Entry->Payload; + Found = true; + break; + } + } + if (!ExternalLock) + Mutex.Unlock(); + return Found; +} + +template +void HashTable:: +resize() { + if (!ExternalLock) + Mutex.CheckLocked(); + size_t OldCapacity = Capacity; + HashEntry **OldTable = Table; + Capacity *= 2; + Table = (HashEntry**)InternalAlloc(Capacity*sizeof(HashEntry*)); + internal_memset(Table, 0, Capacity*sizeof(HashEntry*)); + // Re-hash + for (u32 i = 0; i < OldCapacity; ++i) { + HashEntry *OldEntry = OldTable[i]; + while (OldEntry != nullptr) { + HashEntry *Next = OldEntry->Next; + size_t Hash = HashFunc(OldEntry->Key) % Capacity; + OldEntry->Next = Table[Hash]; + Table[Hash] = OldEntry; + OldEntry = Next; + } + } +} + +template +bool HashTable:: +add(const KeyTy &Key, const DataTy &Payload) { + if (!ExternalLock) + Mutex.Lock(); + bool Exists = false; + size_t Hash = HashFunc(Key) % Capacity; + HashEntry *Entry = Table[Hash]; + for (; Entry != nullptr; Entry = Entry->Next) { + if (EqualFunc(Entry->Key, Key)) { + Exists = true; + break; + } + } + if (!Exists) { + Entries++; + if (Entries*100 >= Capacity*ResizeFactor) { + resize(); + Hash = HashFunc(Key) % Capacity; + } + HashEntry *Add = (HashEntry*)InternalAlloc(sizeof(*Add)); + Add->Key = Key; + Add->Payload = Payload; + Add->Next = Table[Hash]; + Table[Hash] = Add; + } + if (!ExternalLock) + Mutex.Unlock(); + return !Exists; +} + +template +bool HashTable:: +remove(const KeyTy &Key) { + if (!ExternalLock) + Mutex.Lock(); + bool Found = false; + size_t Hash = HashFunc(Key) % Capacity; + HashEntry *Entry = Table[Hash]; + HashEntry *Prev = nullptr; + for (; Entry != nullptr; Prev = Entry, Entry = Entry->Next) { + if (EqualFunc(Entry->Key, Key)) { + Found = true; + Entries--; + if (Prev == nullptr) + Table[Hash] = Entry->Next; + else + Prev->Next = Entry->Next; + Entry->Payload.~DataTy(); + InternalFree(Entry); + break; + } + } + if (!ExternalLock) + Mutex.Unlock(); + return Found; +} + +template +void HashTable:: +lock() { + Mutex.Lock(); +} + +template +void HashTable:: +unlock() { + Mutex.Unlock(); +} + +} // namespace __esan Index: test/esan/Unit/hashtable.cpp =================================================================== --- /dev/null +++ test/esan/Unit/hashtable.cpp @@ -0,0 +1,134 @@ +// RUN: %clangxx_unit -esan-instrument-loads-and-stores=0 -O0 %s -o %t 2>&1 +// RUN: %env_esan_opts="record_snapshots=0" %run %t 2>&1 | FileCheck %s + +#include "esan/esan_hashtable.h" +#include +#include +#include +#include + +class MyData { + public: + MyData(const char *Str) : RefCount(0) { Buf = strdup(Str); } + ~MyData() { + fprintf(stderr, " Destructor: %s.\n", Buf); + free(Buf); + } + bool operator==(MyData &Cmp) { return strcmp(Buf, Cmp.Buf) == 0; } + operator size_t() const { + size_t Res = 0; + for (int i = 0; i < strlen(Buf); ++i) + Res ^= Buf[i]; + return Res; + } + char *Buf; + int RefCount; +}; + +// We use a smart pointer wrapper to free the payload on hashtable removal. +struct MyDataPayload { + MyDataPayload() : Data(nullptr) {} + explicit MyDataPayload(MyData *Data) : Data(Data) { ++Data->RefCount; } + ~MyDataPayload() { + if (Data && --Data->RefCount == 0) { + fprintf(stderr, "Deleting %s.\n", Data->Buf); + delete Data; + } + } + MyDataPayload(const MyDataPayload &Copy) { + Data = Copy.Data; + ++Data->RefCount; + } + MyDataPayload & operator=(const MyDataPayload &Copy) { + if (this != &Copy) { + this->~MyDataPayload(); + Data = Copy.Data; + ++Data->RefCount; + } + return *this; + } + bool operator==(MyDataPayload &Cmp) { return *Data == *Cmp.Data; } + operator size_t() const { return (size_t)*Data; } + MyData *Data; +}; + +int main() +{ + __esan::HashTable IntTable; + assert(IntTable.size() == 0); + bool Added = IntTable.add(4, 42); + assert(Added); + assert(!IntTable.add(4, 42)); + assert(IntTable.size() == 1); + int Value; + bool Found = IntTable.lookup(4, Value); + assert(Found && Value == 42); + assert(!IntTable.remove(5)); + assert(IntTable.remove(4)); + + __esan::HashTable DataTable(4); + MyDataPayload NewData(new MyData("mystring")); + Added = DataTable.add(4, NewData); + assert(Added); + MyDataPayload FoundData; + Found = DataTable.lookup(4, FoundData); + assert(Found && strcmp(FoundData.Data->Buf, "mystring") == 0); + assert(!DataTable.remove(5)); + assert(DataTable.remove(4)); + // Test resize. + for (int i = 0; i < 4; ++i) { + MyDataPayload MoreData(new MyData("delete-at-end")); + Added = DataTable.add(i+1, MoreData); + assert(Added); + assert(!DataTable.add(i+1, MoreData)); + } + for (int i = 0; i < 4; ++i) { + Found = DataTable.lookup(i+1, FoundData); + assert(Found && strcmp(FoundData.Data->Buf, "delete-at-end") == 0); + } + + // Test payload freeing via smart pointer wrapper. + __esan::HashTable DataKeyTable; + MyDataPayload DataA(new MyData("string AB")); + DataKeyTable.lock(); + Added = DataKeyTable.add(DataA, DataA); + assert(Added); + Found = DataKeyTable.lookup(DataA, FoundData); + assert(Found && strcmp(FoundData.Data->Buf, "string AB") == 0); + MyDataPayload DataB(new MyData("string AB")); + Added = DataKeyTable.add(DataB, DataB); + assert(!Added); + DataKeyTable.remove(DataB); // Should free the DataA payload. + DataKeyTable.unlock(); + + // Test custom functors. + struct CustomHash { + size_t operator()(int Key) const { return Key % 4; } + }; + struct CustomEqual { + bool operator()(int Key1, int Key2) const { return Key1 %4 == Key2 % 4; } + }; + __esan::HashTable ModTable; + Added = ModTable.add(2, 42); + assert(Added); + Added = ModTable.add(6, 42); + assert(!Added); + + fprintf(stderr, "All checks passed.\n"); + return 0; +} +// CHECK: Deleting mystring. +// CHECK-NEXT: Destructor: mystring. +// CHECK-NEXT: All checks passed. +// CHECK-NEXT: Deleting string AB. +// CHECK-NEXT: Destructor: string AB. +// CHECK-NEXT: Deleting string AB. +// CHECK-NEXT: Destructor: string AB. +// CHECK-NEXT: Deleting delete-at-end. +// CHECK-NEXT: Destructor: delete-at-end. +// CHECK-NEXT: Deleting delete-at-end. +// CHECK-NEXT: Destructor: delete-at-end. +// CHECK-NEXT: Deleting delete-at-end. +// CHECK-NEXT: Destructor: delete-at-end. +// CHECK-NEXT: Deleting delete-at-end. +// CHECK-NEXT: Destructor: delete-at-end.