diff --git a/lld/Common/CommonLinkerContext.cpp b/lld/Common/CommonLinkerContext.cpp
--- a/lld/Common/CommonLinkerContext.cpp
+++ b/lld/Common/CommonLinkerContext.cpp
@@ -34,6 +34,12 @@
   // new in SpecificAlloc::create().
   for (auto &it : instances)
     it.second->~SpecificAllocBase();
+
+  for (auto &it : perThreadContexts) {
+    for (auto &instance : it.second->perThreadInstances)
+      instance.second->~SpecificAllocBase();
+    delete it.second;
+  }
   lctx = nullptr;
 }
 
@@ -48,4 +54,9 @@
   if (lctx == nullptr)
     return;
   delete lctx;
+  lctx = nullptr;
+}
+
+llvm::StringSaver &lld::perThreadSaver() {
+  return commonContext().perThreadContext()->perThreadSaver;
 }
diff --git a/lld/Common/Memory.cpp b/lld/Common/Memory.cpp
--- a/lld/Common/Memory.cpp
+++ b/lld/Common/Memory.cpp
@@ -8,10 +8,17 @@
 
 #include "lld/Common/Memory.h"
 #include "lld/Common/CommonLinkerContext.h"
+#include "llvm/Support/RWMutex.h"
 
 using namespace llvm;
 using namespace lld;
 
+namespace {
+// Value does not matter, but each thread has a copy (aka diff address) for
+// this.
+__thread int threadTag;
+} // namespace
+
 SpecificAllocBase *
 lld::SpecificAllocBase::getOrCreate(void *tag, size_t size, size_t align,
                                     SpecificAllocBase *(&creator)(void *)) {
@@ -23,3 +30,38 @@
   }
   return instance;
 }
+
+PerThreadContext *CommonLinkerContext::perThreadContext() {
+  llvm::sys::RWMutex &contextMutex = context().contextMutex;
+  void *contextKey = &threadTag;
+
+  {
+    llvm::sys::ScopedReader lock(contextMutex);
+    auto entry = perThreadContexts.find(contextKey);
+    if (entry != perThreadContexts.end())
+      return entry->second;
+  }
+
+  PerThreadContext *perThreadContext = nullptr;
+  // Context didn't exist yet, so create a new one for this thread.
+  // TODO: look in the Map's api for a way to avoid looking up twice
+  {
+    llvm::sys::ScopedWriter lock(contextMutex);
+    perThreadContexts[contextKey] = perThreadContext = new PerThreadContext;
+  }
+
+  return perThreadContext;
+}
+
+SpecificAllocBase *lld::SpecificAllocBase::getOrCreatePerThread(
+    void *tag, size_t size, size_t align,
+    SpecificAllocBase *(&creator)(void *)) {
+  PerThreadContext *threadContext = context().perThreadContext();
+  auto &instances = threadContext->perThreadInstances;
+  auto &instance = instances[tag];
+  if (instance == nullptr) {
+    void *storage = threadContext->bAlloc.Allocate(size, align);
+    instance = creator(storage);
+  }
+  return instance;
+}
diff --git a/lld/include/lld/Common/CommonLinkerContext.h b/lld/include/lld/Common/CommonLinkerContext.h
--- a/lld/include/lld/Common/CommonLinkerContext.h
+++ b/lld/include/lld/Common/CommonLinkerContext.h
@@ -19,8 +19,13 @@
 #ifndef LLD_COMMON_COMMONLINKINGCONTEXT_H
 #define LLD_COMMON_COMMONLINKINGCONTEXT_H
 
+#include <mutex>
+#include <thread>
+#include <unordered_map>
+
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Memory.h"
+#include "llvm/Support/RWMutex.h"
 #include "llvm/Support/StringSaver.h"
 
 namespace llvm {
@@ -29,6 +34,15 @@
 
 namespace lld {
 struct SpecificAllocBase;
+
+// TODO Maybe just use the CommonLinkerContext class - but I dont want
+// unnecessary nesting of the mutexes
+struct PerThreadContext {
+  llvm::BumpPtrAllocator bAlloc;
+  llvm::DenseMap<void *, SpecificAllocBase *> perThreadInstances;
+  llvm::StringSaver perThreadSaver{bAlloc};
+};
+
 class CommonLinkerContext {
 public:
   CommonLinkerContext();
@@ -43,13 +57,17 @@
   ErrorHandler e;
 
   // Per thread allocs and savers.
-  llvm::DenseMap<std::thread::id, SpecificAllocBase *> perThreadInstances;
-  llvm::DenseMap<std::thread::id, llvm : StringSaver> perThreadSavers;
+  PerThreadContext *perThreadContext();
+
+  llvm::DenseMap<void *, PerThreadContext *> perThreadContexts;
+  llvm::sys::RWMutex contextMutex;
 };
 
 // Retrieve the global state. Currently only one state can exist per process,
 // but in the future we plan on supporting an arbitrary number of LLD instances
 // in a single process.
+// TODO: How would this affect the TLS variable's addresses?
+// (hopefully not much)
 CommonLinkerContext &commonContext();
 
 template <typename T = CommonLinkerContext> T &context() {
@@ -58,7 +76,7 @@
 
 bool hasContext();
 
-llvm::StrinSaver &perThreadSaver();
+llvm::StringSaver &perThreadSaver();
 llvm::BumpPtrAllocator &perThreadbAlloc();
 
 inline llvm::StringSaver &saver() { return context().saver; }
diff --git a/lld/include/lld/Common/Memory.h b/lld/include/lld/Common/Memory.h
--- a/lld/include/lld/Common/Memory.h
+++ b/lld/include/lld/Common/Memory.h
@@ -22,8 +22,10 @@
 #define LLD_COMMON_MEMORY_H
 
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/RWMutex.h"
 
 namespace lld {
+
 // A base class only used by the CommonLinkerContext to keep track of the
 // SpecificAlloc<> instances.
 struct SpecificAllocBase {
@@ -61,7 +63,7 @@
 
 template <typename T>
 inline llvm::SpecificBumpPtrAllocator<T> &getSpecificAllocSingletonPerThread() {
-  SpecificAllocBase *instance = SpecificAllocBase::getOrCreate(
+  SpecificAllocBase *instance = SpecificAllocBase::getOrCreatePerThread(
       &SpecificAlloc<T>::tag, sizeof(SpecificAlloc<T>),
       alignof(SpecificAlloc<T>), SpecificAlloc<T>::create);
   return ((SpecificAlloc<T> *)instance)->alloc;
@@ -76,7 +78,7 @@
 // Creates new instances of T off a (almost) contiguous arena/object pool. The
 // instances are destroyed whenever lldMain() goes out of scope.
 template <typename T, typename... U> T *makeThreadSafe(U &&...args) {
-  return new (getSpecificAllocSingleton<T>().Allocate())
+  return new (getSpecificAllocSingletonPerThread<T>().Allocate())
       T(std::forward<U>(args)...);
 }
 } // namespace lld