diff --git a/lld/COFF/DebugTypes.h b/lld/COFF/DebugTypes.h
--- a/lld/COFF/DebugTypes.h
+++ b/lld/COFF/DebugTypes.h
@@ -10,32 +10,37 @@
 #define LLD_COFF_DEBUGTYPES_H
 
 #include "lld/Common/LLVM.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MemoryBuffer.h"
 
 namespace llvm {
 namespace codeview {
-class PrecompRecord;
-class TypeServer2Record;
+struct GloballyHashedType;
 } // namespace codeview
 namespace pdb {
 class NativeSession;
+class TpiStream;
 }
 } // namespace llvm
 
 namespace lld {
 namespace coff {
 
+using llvm::codeview::GloballyHashedType;
 using llvm::codeview::TypeIndex;
 
 class ObjFile;
 class PDBInputFile;
 class TypeMerger;
+struct GHashState;
 
 class TpiSource {
 public:
-  enum TpiKind { Regular, PCH, UsingPCH, PDB, PDBIpi, UsingPDB };
+  enum TpiKind : uint8_t { Regular, PCH, UsingPCH, PDB, PDBIpi, UsingPDB };
 
   TpiSource(TpiKind k, ObjFile *f);
   virtual ~TpiSource();
@@ -53,21 +58,97 @@
   /// caller-provided ObjectIndexMap.
   virtual Error mergeDebugT(TypeMerger *m);
 
+  /// Load global hashes, either by hashing types directly, or by loading them
+  /// from LLVM's .debug$H section.
+  virtual void loadGHashes();
+
+  /// Use global hashes to merge type information.
+  virtual void remapTpiWithGHashes(GHashState *g);
+
+  // Remap a type index in place.
+  bool remapTypeIndex(TypeIndex &ti, llvm::codeview::TiRefKind refKind) const;
+
+protected:
+  void remapRecord(MutableArrayRef<uint8_t> rec,
+                   ArrayRef<llvm::codeview::TiReference> typeRefs);
+
+  void mergeTypeRecord(llvm::codeview::CVType ty);
+
+  // Merge the type records listed in uniqueTypes. beginIndex is the TypeIndex
+  // of the first record in this source, typically 0x1000. When PCHs are
+  // involved, it may start higher.
+  void mergeUniqueTypeRecords(
+      ArrayRef<uint8_t> debugTypes,
+      TypeIndex beginIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex));
+
+  // Use the ghash table to construct a map from source type index to
+  // destination PDB type index. Usable for either TPI or IPI.
+  void fillMapFromGHashes(GHashState *m,
+                          llvm::SmallVectorImpl<TypeIndex> &indexMap);
+
+  // Copies ghashes from a vector into an array. These are long lived, so it's
+  // worth the time to copy these into an appropriately sized vector to reduce
+  // memory usage.
+  void assignGHashesFromVector(std::vector<GloballyHashedType> &&hashVec);
+
+  // Walk over file->debugTypes and fill in the isItemIndex bit vector.
+  void fillIsItemIndexFromDebugT();
+
+public:
+  bool remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec);
+
+  void remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec);
+
   /// Is this a dependent file that needs to be processed first, before other
   /// OBJs?
   virtual bool isDependency() const { return false; }
 
-  static void forEachSource(llvm::function_ref<void(TpiSource *)> fn);
+  /// Returns true if this type record should be omitted from the PDB, even if
+  /// it is unique. This prevents a record from being added to the input ghash
+  /// table.
+  bool shouldOmitFromPdb(uint32_t ghashIdx) {
+    return ghashIdx == endPrecompGHashIdx;
+  }
+
+  /// All sources of type information in the program.
+  static std::vector<TpiSource *> instances;
+
+  /// Dependency type sources, such as type servers or PCH object files. These
+  /// must be processed before objects that rely on them. Set by
+  /// TpiSources::sortDependencies.
+  static ArrayRef<TpiSource *> dependencySources;
+
+  /// Object file sources. These must be processed after dependencySources.
+  static ArrayRef<TpiSource *> objectSources;
+
+  /// Sorts the dependencies and reassigns TpiSource indices.
+  static void sortDependencies();
 
   static uint32_t countTypeServerPDBs();
   static uint32_t countPrecompObjs();
 
+  /// Free heap allocated ghashes.
+  static void clearGHashes();
+
   /// Clear global data structures for TpiSources.
   static void clear();
 
   const TpiKind kind;
+  bool ownedGHashes = true;
+  uint32_t tpiSrcIdx = 0;
+
+protected:
+  /// The ghash index (zero based, not 0x1000-based) of the LF_ENDPRECOMP record
+  /// in this object, if one exists. This is the all ones value otherwise. It is
+  /// recorded here so that it can be omitted from the final ghash table.
+  uint32_t endPrecompGHashIdx = ~0U;
+
+public:
   ObjFile *file;
 
+  /// An error encountered during type merging, if any.
+  Error typeMergingError = Error::success();
+
   // Storage for tpiMap or ipiMap, depending on the kind of source.
   llvm::SmallVector<TypeIndex, 0> indexMapStorage;
 
@@ -76,6 +157,31 @@
   // objects.
   llvm::ArrayRef<TypeIndex> tpiMap;
   llvm::ArrayRef<TypeIndex> ipiMap;
+
+  /// Array of global type hashes, indexed by TypeIndex. May be calculated on
+  /// demand, or present in input object files.
+  llvm::ArrayRef<llvm::codeview::GloballyHashedType> ghashes;
+
+  /// When ghashing is used, record the mapping from LF_[M]FUNC_ID to function
+  /// type index here. Both indices are PDB indices, not object type indexes.
+  llvm::DenseMap<TypeIndex, TypeIndex> funcIdToType;
+
+  /// Indicates if a type record is an item index or a type index.
+  llvm::BitVector isItemIndex;
+
+  /// A list of all "unique" type indices which must be merged into the final
+  /// PDB. GHash type deduplication produces this list, and it should be
+  /// considerably smaller than the input.
+  std::vector<uint32_t> uniqueTypes;
+
+  struct MergedInfo {
+    std::vector<uint8_t> recs;
+    std::vector<uint16_t> recSizes;
+    std::vector<uint32_t> recHashes;
+  };
+
+  MergedInfo mergedTpi;
+  MergedInfo mergedIpi;
 };
 
 TpiSource *makeTpiSource(ObjFile *file);
diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp
--- a/lld/COFF/DebugTypes.cpp
+++ b/lld/COFF/DebugTypes.cpp
@@ -10,9 +10,12 @@
 #include "Chunks.h"
 #include "Driver.h"
 #include "InputFiles.h"
+#include "PDB.h"
 #include "TypeMerger.h"
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Memory.h"
+#include "lld/Common/Timer.h"
+#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
@@ -20,7 +23,10 @@
 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
 
 using namespace llvm;
@@ -54,6 +60,10 @@
   }
 
   Error mergeDebugT(TypeMerger *m) override;
+
+  void loadGHashes() override;
+  void remapTpiWithGHashes(GHashState *g) override;
+
   bool isDependency() const override { return true; }
 
   PDBInputFile *pdbInputFile = nullptr;
@@ -73,22 +83,29 @@
 
   friend class TypeServerSource;
 
-  // IPI merging is handled in TypeServerSource::mergeDebugT, since it depends
-  // directly on type merging.
+  // All of the TpiSource methods are no-ops. The parent TypeServerSource
+  // handles both TPI and IPI.
   Error mergeDebugT(TypeMerger *m) override { return Error::success(); }
-
+  void loadGHashes() override {}
+  void remapTpiWithGHashes(GHashState *g) override {}
   bool isDependency() const override { return true; }
 };
 
 // This class represents the debug type stream of an OBJ file that depends on a
 // PDB type server (see TypeServerSource).
 class UseTypeServerSource : public TpiSource {
+  Expected<TypeServerSource *> getTypeServerSource();
+
 public:
   UseTypeServerSource(ObjFile *f, TypeServer2Record ts)
       : TpiSource(UsingPDB, f), typeServerDependency(ts) {}
 
   Error mergeDebugT(TypeMerger *m) override;
 
+  // No need to load ghashes from /Zi objects.
+  void loadGHashes() override {}
+  void remapTpiWithGHashes(GHashState *g) override;
+
   // Information about the PDB type server dependency, that needs to be loaded
   // in before merging this OBJ.
   TypeServer2Record typeServerDependency;
@@ -110,6 +127,8 @@
             toString(it.first->second->file) + " and " + toString(file) + ")");
   }
 
+  void loadGHashes() override;
+
   bool isDependency() const override { return true; }
 
   static std::map<uint32_t, PrecompSource *> mappings;
@@ -124,21 +143,47 @@
 
   Error mergeDebugT(TypeMerger *m) override;
 
+  void loadGHashes() override;
+  void remapTpiWithGHashes(GHashState *g) override;
+
+private:
+  Error mergeInPrecompHeaderObj();
+
+public:
   // Information about the Precomp OBJ dependency, that needs to be loaded in
   // before merging this OBJ.
   PrecompRecord precompDependency;
 };
 } // namespace
 
-static std::vector<TpiSource *> gc;
+std::vector<TpiSource *> TpiSource::instances;
+ArrayRef<TpiSource *> TpiSource::dependencySources;
+ArrayRef<TpiSource *> TpiSource::objectSources;
 
-TpiSource::TpiSource(TpiKind k, ObjFile *f) : kind(k), file(f) {
-  gc.push_back(this);
+TpiSource::TpiSource(TpiKind k, ObjFile *f)
+    : kind(k), tpiSrcIdx(instances.size()), file(f) {
+  instances.push_back(this);
 }
 
 // Vtable key method.
 TpiSource::~TpiSource() = default;
 
+void TpiSource::sortDependencies() {
+  // Order dependencies first, but preserve the existing order.
+  std::vector<TpiSource *> deps;
+  std::vector<TpiSource *> objs;
+  for (TpiSource *s : instances)
+    (s->isDependency() ? deps : objs).push_back(s);
+  uint32_t numDeps = deps.size();
+  uint32_t numObjs = objs.size();
+  instances = std::move(deps);
+  instances.insert(instances.end(), objs.begin(), objs.end());
+  for (uint32_t i = 0, e = instances.size(); i < e; ++i)
+    instances[i]->tpiSrcIdx = i;
+  dependencySources = makeArrayRef(instances.data(), numDeps);
+  objectSources = makeArrayRef(instances.data() + numDeps, numObjs);
+}
+
 TpiSource *lld::coff::makeTpiSource(ObjFile *file) {
   return make<TpiSource>(TpiSource::Regular, file);
 }
@@ -165,14 +210,68 @@
   return make<UsePrecompSource>(file, precomp);
 }
 
-void TpiSource::forEachSource(llvm::function_ref<void(TpiSource *)> fn) {
-  for_each(gc, fn);
-}
-
 std::map<codeview::GUID, TypeServerSource *> TypeServerSource::mappings;
 
 std::map<uint32_t, PrecompSource *> PrecompSource::mappings;
 
+bool TpiSource::remapTypeIndex(TypeIndex &ti, TiRefKind refKind) const {
+  if (ti.isSimple())
+    return true;
+
+  // This can be an item index or a type index. Choose the appropriate map.
+  ArrayRef<TypeIndex> tpiOrIpiMap =
+      (refKind == TiRefKind::IndexRef) ? ipiMap : tpiMap;
+  if (ti.toArrayIndex() >= tpiOrIpiMap.size())
+    return false;
+  ti = tpiOrIpiMap[ti.toArrayIndex()];
+  return true;
+}
+
+void TpiSource::remapRecord(MutableArrayRef<uint8_t> rec,
+                            ArrayRef<TiReference> typeRefs) {
+  MutableArrayRef<uint8_t> contents = rec.drop_front(sizeof(RecordPrefix));
+  for (const TiReference &ref : typeRefs) {
+    unsigned byteSize = ref.Count * sizeof(TypeIndex);
+    if (contents.size() < ref.Offset + byteSize)
+      fatal("symbol record too short");
+
+    MutableArrayRef<TypeIndex> indices(
+        reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
+    for (TypeIndex &ti : indices) {
+      if (!remapTypeIndex(ti, ref.Kind)) {
+        if (config->verbose) {
+          uint16_t kind =
+              reinterpret_cast<const RecordPrefix *>(rec.data())->RecordKind;
+          StringRef fname = file ? file->getName() : "<unknown PDB>";
+          log("failed to remap type index in record of kind 0x" +
+              utohexstr(kind) + " in " + fname + " with bad " +
+              (ref.Kind == TiRefKind::IndexRef ? "item" : "type") +
+              " index 0x" + utohexstr(ti.getIndex()));
+        }
+        ti = TypeIndex(SimpleTypeKind::NotTranslated);
+        continue;
+      }
+    }
+  }
+}
+
+void TpiSource::remapTypesInTypeRecord(MutableArrayRef<uint8_t> rec) {
+  // TODO: Handle errors similar to symbols.
+  SmallVector<TiReference, 32> typeRefs;
+  discoverTypeIndices(CVType(rec), typeRefs);
+  remapRecord(rec, typeRefs);
+}
+
+bool TpiSource::remapTypesInSymbolRecord(MutableArrayRef<uint8_t> rec) {
+  // Discover type index references in the record. Skip it if we don't
+  // know where they are.
+  SmallVector<TiReference, 32> typeRefs;
+  if (!discoverTypeIndicesInSymbol(rec, typeRefs))
+    return false;
+  remapRecord(rec, typeRefs);
+  return true;
+}
+
 // A COFF .debug$H section is currently a clang extension.  This function checks
 // if a .debug$H section is in a format that we expect / understand, so that we
 // can ignore any sections which are coincidentally also named .debug$H but do
@@ -203,7 +302,6 @@
 static ArrayRef<GloballyHashedType>
 getHashesFromDebugH(ArrayRef<uint8_t> debugH) {
   assert(canUseDebugH(debugH));
-
   debugH = debugH.drop_front(sizeof(object::debug_h_header));
   uint32_t count = debugH.size() / sizeof(GloballyHashedType);
   return {reinterpret_cast<const GloballyHashedType *>(debugH.data()), count};
@@ -211,32 +309,17 @@
 
 // Merge .debug$T for a generic object file.
 Error TpiSource::mergeDebugT(TypeMerger *m) {
+  assert(!config->debugGHashes &&
+         "use remapTpiWithGHashes when ghash is enabled");
+
   CVTypeArray types;
   BinaryStreamReader reader(file->debugTypes, support::little);
   cantFail(reader.readArray(types, reader.getLength()));
 
-  if (config->debugGHashes) {
-    ArrayRef<GloballyHashedType> hashes;
-    std::vector<GloballyHashedType> ownedHashes;
-    if (Optional<ArrayRef<uint8_t>> debugH = getDebugH(file))
-      hashes = getHashesFromDebugH(*debugH);
-    else {
-      ownedHashes = GloballyHashedType::hashTypes(types);
-      hashes = ownedHashes;
-    }
-
-    if (auto err = mergeTypeAndIdRecords(m->globalIDTable, m->globalTypeTable,
-                                         indexMapStorage, types, hashes,
-                                         file->pchSignature))
-      fatal("codeview::mergeTypeAndIdRecords failed: " +
-            toString(std::move(err)));
-  } else {
-    if (auto err =
-            mergeTypeAndIdRecords(m->idTable, m->typeTable, indexMapStorage,
-                                  types, file->pchSignature))
-      fatal("codeview::mergeTypeAndIdRecords failed: " +
-            toString(std::move(err)));
-  }
+  if (auto err = mergeTypeAndIdRecords(
+          m->idTable, m->typeTable, indexMapStorage, types, file->pchSignature))
+    fatal("codeview::mergeTypeAndIdRecords failed: " +
+          toString(std::move(err)));
 
   // In an object, there is only one mapping for both types and items.
   tpiMap = indexMapStorage;
@@ -267,6 +350,9 @@
 
 // Merge types from a type server PDB.
 Error TypeServerSource::mergeDebugT(TypeMerger *m) {
+  assert(!config->debugGHashes &&
+         "use remapTpiWithGHashes when ghash is enabled");
+
   pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
   Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
   if (auto e = expectedTpi.takeError())
@@ -279,45 +365,18 @@
     maybeIpi = &*expectedIpi;
   }
 
-  if (config->debugGHashes) {
-    // PDBs do not actually store global hashes, so when merging a type server
-    // PDB we have to synthesize global hashes.  To do this, we first synthesize
-    // global hashes for the TPI stream, since it is independent, then we
-    // synthesize hashes for the IPI stream, using the hashes for the TPI stream
-    // as inputs.
-    auto tpiHashes = GloballyHashedType::hashTypes(expectedTpi->typeArray());
-    Optional<uint32_t> endPrecomp;
-    // Merge TPI first, because the IPI stream will reference type indices.
-    if (auto err =
-            mergeTypeRecords(m->globalTypeTable, indexMapStorage,
-                             expectedTpi->typeArray(), tpiHashes, endPrecomp))
-      fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
-    tpiMap = indexMapStorage;
-
-    // Merge IPI.
-    if (maybeIpi) {
-      auto ipiHashes =
-          GloballyHashedType::hashIds(maybeIpi->typeArray(), tpiHashes);
-      if (auto err =
-              mergeIdRecords(m->globalIDTable, tpiMap, ipiSrc->indexMapStorage,
-                             maybeIpi->typeArray(), ipiHashes))
-        fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
-      ipiMap = ipiSrc->indexMapStorage;
-    }
-  } else {
-    // Merge TPI first, because the IPI stream will reference type indices.
-    if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
-                                    expectedTpi->typeArray()))
-      fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
-    tpiMap = indexMapStorage;
-
-    // Merge IPI.
-    if (maybeIpi) {
-      if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
-                                    maybeIpi->typeArray()))
-        fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
-      ipiMap = ipiSrc->indexMapStorage;
-    }
+  // Merge TPI first, because the IPI stream will reference type indices.
+  if (auto err = mergeTypeRecords(m->typeTable, indexMapStorage,
+                                  expectedTpi->typeArray()))
+    fatal("codeview::mergeTypeRecords failed: " + toString(std::move(err)));
+  tpiMap = indexMapStorage;
+
+  // Merge IPI.
+  if (maybeIpi) {
+    if (auto err = mergeIdRecords(m->idTable, tpiMap, ipiSrc->indexMapStorage,
+                                  maybeIpi->typeArray()))
+      fatal("codeview::mergeIdRecords failed: " + toString(std::move(err)));
+    ipiMap = ipiSrc->indexMapStorage;
   }
 
   if (config->showSummary) {
@@ -337,7 +396,7 @@
   return Error::success();
 }
 
-Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
+Expected<TypeServerSource *> UseTypeServerSource::getTypeServerSource() {
   const codeview::GUID &tsId = typeServerDependency.getGuid();
   StringRef tsPath = typeServerDependency.getName();
 
@@ -357,8 +416,15 @@
 
     tsSrc = (TypeServerSource *)pdb->debugTypesObj;
   }
+  return tsSrc;
+}
+
+Error UseTypeServerSource::mergeDebugT(TypeMerger *m) {
+  Expected<TypeServerSource *> tsSrc = getTypeServerSource();
+  if (!tsSrc)
+    return tsSrc.takeError();
 
-  pdb::PDBFile &pdbSession = tsSrc->pdbInputFile->session->getPDBFile();
+  pdb::PDBFile &pdbSession = (*tsSrc)->pdbInputFile->session->getPDBFile();
   auto expectedInfo = pdbSession.getPDBInfoStream();
   if (!expectedInfo)
     return expectedInfo.takeError();
@@ -368,12 +434,12 @@
   // must match the GUID specified in the TypeServer2 record.
   if (expectedInfo->getGuid() != typeServerDependency.getGuid())
     return createFileError(
-        tsPath,
+        typeServerDependency.getName(),
         make_error<pdb::PDBError>(pdb::pdb_error_code::signature_out_of_date));
 
   // Reuse the type index map of the type server.
-  tpiMap = tsSrc->tpiMap;
-  ipiMap = tsSrc->ipiMap;
+  tpiMap = (*tsSrc)->tpiMap;
+  ipiMap = (*tsSrc)->ipiMap;
   return Error::success();
 }
 
@@ -399,26 +465,28 @@
   return nullptr;
 }
 
-static Expected<PrecompSource *> findPrecompMap(ObjFile *file,
-                                                PrecompRecord &pr) {
+static PrecompSource *findPrecompSource(ObjFile *file, PrecompRecord &pr) {
   // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
   // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
   // the paths embedded in the OBJs are in the Windows format.
   SmallString<128> prFileName =
       sys::path::filename(pr.getPrecompFilePath(), sys::path::Style::windows);
 
-  PrecompSource *precomp;
   auto it = PrecompSource::mappings.find(pr.getSignature());
   if (it != PrecompSource::mappings.end()) {
-    precomp = it->second;
-  } else {
-    // Lookup by name
-    precomp = findObjByName(prFileName);
+    return it->second;
   }
+  // Lookup by name
+  return findObjByName(prFileName);
+}
+
+static Expected<PrecompSource *> findPrecompMap(ObjFile *file,
+                                                PrecompRecord &pr) {
+  PrecompSource *precomp = findPrecompSource(file, pr);
 
   if (!precomp)
     return createFileError(
-        prFileName,
+        pr.getPrecompFilePath(),
         make_error<pdb::PDBError>(pdb::pdb_error_code::no_matching_pch));
 
   if (pr.getSignature() != file->pchSignature)
@@ -437,11 +505,8 @@
 /// Merges a precompiled headers TPI map into the current TPI map. The
 /// precompiled headers object will also be loaded and remapped in the
 /// process.
-static Error
-mergeInPrecompHeaderObj(ObjFile *file,
-                        SmallVectorImpl<TypeIndex> &indexMapStorage,
-                        PrecompRecord &precomp) {
-  auto e = findPrecompMap(file, precomp);
+Error UsePrecompSource::mergeInPrecompHeaderObj() {
+  auto e = findPrecompMap(file, precompDependency);
   if (!e)
     return e.takeError();
 
@@ -449,11 +514,17 @@
   if (precompSrc->tpiMap.empty())
     return Error::success();
 
-  assert(precomp.getStartTypeIndex() == TypeIndex::FirstNonSimpleIndex);
-  assert(precomp.getTypesCount() <= precompSrc->tpiMap.size());
+  assert(precompDependency.getStartTypeIndex() ==
+         TypeIndex::FirstNonSimpleIndex);
+  assert(precompDependency.getTypesCount() <= precompSrc->tpiMap.size());
   // Use the previously remapped index map from the precompiled headers.
   indexMapStorage.append(precompSrc->tpiMap.begin(),
-                         precompSrc->tpiMap.begin() + precomp.getTypesCount());
+                         precompSrc->tpiMap.begin() +
+                             precompDependency.getTypesCount());
+
+  if (config->debugGHashes)
+    funcIdToType = precompSrc->funcIdToType; // FIXME: Save copy
+
   return Error::success();
 }
 
@@ -462,8 +533,7 @@
   // precompiled headers object (/Yc) first. Some type indices in the current
   // object are referencing data in the precompiled headers object, so we need
   // both to be loaded.
-  if (Error e =
-          mergeInPrecompHeaderObj(file, indexMapStorage, precompDependency))
+  if (Error e = mergeInPrecompHeaderObj())
     return e;
 
   return TpiSource::mergeDebugT(m);
@@ -478,7 +548,586 @@
 }
 
 void TpiSource::clear() {
-  gc.clear();
+  // Clean up any owned ghash allocations.
+  clearGHashes();
+  TpiSource::instances.clear();
   TypeServerSource::mappings.clear();
   PrecompSource::mappings.clear();
 }
+
+//===----------------------------------------------------------------------===//
+// Parellel GHash type merging implementation.
+//===----------------------------------------------------------------------===//
+
+void TpiSource::loadGHashes() {
+  if (Optional<ArrayRef<uint8_t>> debugH = getDebugH(file)) {
+    ghashes = getHashesFromDebugH(*debugH);
+    ownedGHashes = false;
+  } else {
+    CVTypeArray types;
+    BinaryStreamReader reader(file->debugTypes, support::little);
+    cantFail(reader.readArray(types, reader.getLength()));
+    assignGHashesFromVector(GloballyHashedType::hashTypes(types));
+  }
+
+  fillIsItemIndexFromDebugT();
+}
+
+// Copies ghashes from a vector into an array. These are long lived, so it's
+// worth the time to copy these into an appropriately sized vector to reduce
+// memory usage.
+void TpiSource::assignGHashesFromVector(
+    std::vector<GloballyHashedType> &&hashVec) {
+  GloballyHashedType *hashes = new GloballyHashedType[hashVec.size()];
+  memcpy(hashes, hashVec.data(), hashVec.size() * sizeof(GloballyHashedType));
+  ghashes = makeArrayRef(hashes, hashVec.size());
+  ownedGHashes = true;
+}
+
+// Faster way to iterate type records. forEachTypeChecked is faster than
+// iterating CVTypeArray. It avoids virtual readBytes calls in inner loops.
+static void forEachTypeChecked(ArrayRef<uint8_t> types,
+                               function_ref<void(const CVType &)> fn) {
+  checkError(
+      forEachCodeViewRecord<CVType>(types, [fn](const CVType &ty) -> Error {
+        fn(ty);
+        return Error::success();
+      }));
+}
+
+// Walk over file->debugTypes and fill in the isItemIndex bit vector.
+// TODO: Store this information in .debug$H so that we don't have to recompute
+// it. This is the main bottleneck slowing down parallel ghashing with one
+// thread over single-threaded ghashing.
+void TpiSource::fillIsItemIndexFromDebugT() {
+  uint32_t index = 0;
+  isItemIndex.resize(ghashes.size());
+  forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
+    if (isIdRecord(ty.kind()))
+      isItemIndex.set(index);
+    ++index;
+  });
+}
+
+void TpiSource::mergeTypeRecord(CVType ty) {
+  // Decide if the merged type goes into TPI or IPI.
+  bool isItem = isIdRecord(ty.kind());
+  MergedInfo &merged = isItem ? mergedIpi : mergedTpi;
+
+  // Copy the type into our mutable buffer.
+  assert(ty.length() <= codeview::MaxRecordLength);
+  size_t offset = merged.recs.size();
+  size_t newSize = alignTo(ty.length(), 4);
+  merged.recs.resize(offset + newSize);
+  auto newRec = makeMutableArrayRef(&merged.recs[offset], newSize);
+  memcpy(newRec.data(), ty.data().data(), newSize);
+
+  // Fix up the record prefix and padding bytes if it required resizing.
+  if (newSize != ty.length()) {
+    reinterpret_cast<RecordPrefix *>(newRec.data())->RecordLen = newSize - 2;
+    for (size_t i = ty.length(); i < newSize; ++i)
+      newRec[i] = LF_PAD0 + (newSize - i);
+  }
+
+  // Remap the type indices in the new record.
+  remapTypesInTypeRecord(newRec);
+  uint32_t pdbHash = check(pdb::hashTypeRecord(CVType(newRec)));
+  merged.recSizes.push_back(static_cast<uint16_t>(newSize));
+  merged.recHashes.push_back(pdbHash);
+}
+
+void TpiSource::mergeUniqueTypeRecords(ArrayRef<uint8_t> typeRecords,
+                                       TypeIndex beginIndex) {
+  // Re-sort the list of unique types by index.
+  if (kind == PDB)
+    assert(std::is_sorted(uniqueTypes.begin(), uniqueTypes.end()));
+  else
+    llvm::sort(uniqueTypes);
+
+  // Accumulate all the unique types into one buffer in mergedTypes.
+  uint32_t ghashIndex = 0;
+  auto nextUniqueIndex = uniqueTypes.begin();
+  assert(mergedTpi.recs.empty());
+  assert(mergedIpi.recs.empty());
+  forEachTypeChecked(typeRecords, [&](const CVType &ty) {
+    if (nextUniqueIndex != uniqueTypes.end() &&
+        *nextUniqueIndex == ghashIndex) {
+      mergeTypeRecord(ty);
+      ++nextUniqueIndex;
+    }
+    if (ty.kind() == LF_FUNC_ID || ty.kind() == LF_MFUNC_ID) {
+      bool success = ty.length() >= 12;
+      TypeIndex srcFuncIdIndex = beginIndex + ghashIndex;
+      TypeIndex funcId = srcFuncIdIndex;
+      TypeIndex funcType;
+      if (success) {
+        funcType = *reinterpret_cast<const TypeIndex *>(&ty.data()[8]);
+        success &= remapTypeIndex(funcId, TiRefKind::IndexRef);
+        success &= remapTypeIndex(funcType, TiRefKind::TypeRef);
+      }
+      if (success) {
+        funcIdToType.insert({funcId, funcType});
+      } else {
+        StringRef fname = file ? file->getName() : "<unknown PDB>";
+        warn("corrupt LF_[M]FUNC_ID record 0x" +
+             utohexstr(srcFuncIdIndex.getIndex()) + " in " + fname);
+      }
+    }
+    ++ghashIndex;
+  });
+  assert(nextUniqueIndex == uniqueTypes.end() &&
+         "failed to merge all desired records");
+  assert(uniqueTypes.size() ==
+             mergedTpi.recSizes.size() + mergedIpi.recSizes.size() &&
+         "missing desired record");
+}
+
+void TpiSource::remapTpiWithGHashes(GHashState *g) {
+  assert(config->debugGHashes && "ghashes must be enabled");
+  fillMapFromGHashes(g, indexMapStorage);
+  tpiMap = indexMapStorage;
+  ipiMap = indexMapStorage;
+  mergeUniqueTypeRecords(file->debugTypes);
+  // TODO: Free all unneeded ghash resources now that we have a full index map.
+}
+
+// PDBs do not actually store global hashes, so when merging a type server
+// PDB we have to synthesize global hashes.  To do this, we first synthesize
+// global hashes for the TPI stream, since it is independent, then we
+// synthesize hashes for the IPI stream, using the hashes for the TPI stream
+// as inputs.
+void TypeServerSource::loadGHashes() {
+  // Don't hash twice.
+  if (!ghashes.empty())
+    return;
+  pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
+
+  // Hash TPI stream.
+  Expected<pdb::TpiStream &> expectedTpi = pdbFile.getPDBTpiStream();
+  if (auto e = expectedTpi.takeError())
+    fatal("Type server does not have TPI stream: " + toString(std::move(e)));
+  assignGHashesFromVector(
+      GloballyHashedType::hashTypes(expectedTpi->typeArray()));
+  isItemIndex.resize(ghashes.size());
+
+  // Hash IPI stream, which depends on TPI ghashes.
+  if (!pdbFile.hasPDBIpiStream())
+    return;
+  Expected<pdb::TpiStream &> expectedIpi = pdbFile.getPDBIpiStream();
+  if (auto e = expectedIpi.takeError())
+    fatal("error retreiving IPI stream: " + toString(std::move(e)));
+  ipiSrc->assignGHashesFromVector(
+      GloballyHashedType::hashIds(expectedIpi->typeArray(), ghashes));
+
+  // The IPI stream isItemIndex bitvector should be all ones.
+  ipiSrc->isItemIndex.resize(ipiSrc->ghashes.size());
+  ipiSrc->isItemIndex.set(0, ipiSrc->ghashes.size());
+}
+
+// Flatten discontiguous PDB type arrays to bytes so that we can use
+// forEachTypeChecked instead of CVTypeArray iteration. Copying all types from
+// type servers is faster than iterating all object files compiled with /Z7 with
+// CVTypeArray, which has high overheads due to the virtual interface of
+// BinaryStream::readBytes.
+static ArrayRef<uint8_t> typeArrayToBytes(const CVTypeArray &types) {
+  BinaryStreamRef stream = types.getUnderlyingStream();
+  ArrayRef<uint8_t> debugTypes;
+  checkError(stream.readBytes(0, stream.getLength(), debugTypes));
+  return debugTypes;
+}
+
+// Merge types from a type server PDB.
+void TypeServerSource::remapTpiWithGHashes(GHashState *g) {
+  assert(config->debugGHashes && "ghashes must be enabled");
+
+  // IPI merging depends on TPI, so do TPI first, then do IPI.  No need to
+  // propagate errors, those should've been handled during ghash loading.
+  pdb::PDBFile &pdbFile = pdbInputFile->session->getPDBFile();
+  pdb::TpiStream &tpi = check(pdbFile.getPDBTpiStream());
+  fillMapFromGHashes(g, indexMapStorage);
+  tpiMap = indexMapStorage;
+  mergeUniqueTypeRecords(typeArrayToBytes(tpi.typeArray()));
+  if (pdbFile.hasPDBIpiStream()) {
+    pdb::TpiStream &ipi = check(pdbFile.getPDBIpiStream());
+    ipiSrc->indexMapStorage.resize(ipiSrc->ghashes.size());
+    ipiSrc->fillMapFromGHashes(g, ipiSrc->indexMapStorage);
+    ipiMap = ipiSrc->indexMapStorage;
+    ipiSrc->tpiMap = tpiMap;
+    ipiSrc->ipiMap = ipiMap;
+    ipiSrc->mergeUniqueTypeRecords(typeArrayToBytes(ipi.typeArray()));
+    funcIdToType = ipiSrc->funcIdToType; // FIXME: Save copy
+  }
+}
+
+void UseTypeServerSource::remapTpiWithGHashes(GHashState *g) {
+  // No remapping to do with /Zi objects. Simply use the index map from the type
+  // server. Errors should have been reported earlier. Symbols from this object
+  // will be ignored.
+  Expected<TypeServerSource *> maybeTsSrc = getTypeServerSource();
+  if (!maybeTsSrc) {
+    typeMergingError = maybeTsSrc.takeError();
+    return;
+  }
+  TypeServerSource *tsSrc = *maybeTsSrc;
+  tpiMap = tsSrc->tpiMap;
+  ipiMap = tsSrc->ipiMap;
+  funcIdToType = tsSrc->funcIdToType; // FIXME: Save copy
+}
+
+void PrecompSource::loadGHashes() {
+  if (getDebugH(file)) {
+    warn("ignoring .debug$H section; pch with ghash is not implemented");
+  }
+
+  uint32_t ghashIdx = 0;
+  std::vector<GloballyHashedType> hashVec;
+  forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
+    // Remember the index of the LF_ENDPRECOMP record so it can be excluded from
+    // the PDB. There must be an entry in the list of ghashes so that the type
+    // indexes of the following records in the /Yc PCH object line up.
+    if (ty.kind() == LF_ENDPRECOMP)
+      endPrecompGHashIdx = ghashIdx;
+
+    hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
+    isItemIndex.push_back(isIdRecord(ty.kind()));
+    ++ghashIdx;
+  });
+  assignGHashesFromVector(std::move(hashVec));
+}
+
+void UsePrecompSource::loadGHashes() {
+  PrecompSource *pchSrc = findPrecompSource(file, precompDependency);
+  if (!pchSrc)
+    return;
+
+  // To compute ghashes of a /Yu object file, we need to build on the the
+  // ghashes of the /Yc PCH object. After we are done hashing, discard the
+  // ghashes from the PCH source so we don't unnecessarily try to deduplicate
+  // them.
+  std::vector<GloballyHashedType> hashVec =
+      pchSrc->ghashes.take_front(precompDependency.getTypesCount());
+  forEachTypeChecked(file->debugTypes, [&](const CVType &ty) {
+    hashVec.push_back(GloballyHashedType::hashType(ty, hashVec, hashVec));
+    isItemIndex.push_back(isIdRecord(ty.kind()));
+  });
+  hashVec.erase(hashVec.begin(),
+                hashVec.begin() + precompDependency.getTypesCount());
+  assignGHashesFromVector(std::move(hashVec));
+}
+
+void UsePrecompSource::remapTpiWithGHashes(GHashState *g) {
+  // This object was compiled with /Yu, so process the corresponding
+  // precompiled headers object (/Yc) first. Some type indices in the current
+  // object are referencing data in the precompiled headers object, so we need
+  // both to be loaded.
+  if (Error e = mergeInPrecompHeaderObj()) {
+    typeMergingError = std::move(e);
+    return;
+  }
+
+  fillMapFromGHashes(g, indexMapStorage);
+  tpiMap = indexMapStorage;
+  ipiMap = indexMapStorage;
+  mergeUniqueTypeRecords(file->debugTypes,
+                         TypeIndex(precompDependency.getStartTypeIndex() +
+                                   precompDependency.getTypesCount()));
+}
+
+namespace {
+/// A concurrent hash table for global type hashing. It is based on this paper:
+/// Concurrent Hash Tables: Fast and General(?)!
+/// https://dl.acm.org/doi/10.1145/3309206
+///
+/// This hash table is meant to be used in two phases:
+/// 1. concurrent insertions
+/// 2. concurrent reads
+/// It does not support lookup, deletion, or rehashing. It uses linear probing.
+///
+/// The paper describes storing a key-value pair in two machine words.
+/// Generally, the values stored in this map are type indices, and we can use
+/// those values to recover the ghash key from a side table. This allows us to
+/// shrink the table entries further at the cost of some loads, and sidesteps
+/// the need for a 128 bit atomic compare-and-swap operation.
+///
+/// During insertion, a priority function is used to decide which insertion
+/// should be preferred. This ensures that the output is deterministic. For
+/// ghashing, lower tpiSrcIdx values (earlier inputs) are preferred.
+///
+class GHashCell;
+struct GHashTable {
+  GHashCell *table = nullptr;
+  uint32_t tableSize = 0;
+
+  GHashTable() = default;
+  ~GHashTable();
+
+  /// Initialize the table with the given size. Because the table cannot be
+  /// resized, the initial size of the table must be large enough to contain all
+  /// inputs, or insertion may not be able to find an empty cell.
+  void init(uint32_t newTableSize);
+
+  /// Insert the cell with the given ghash into the table. Return the insertion
+  /// position in the table. It is safe for the caller to store the insertion
+  /// position because the table cannot be resized.
+  uint32_t insert(GloballyHashedType ghash, GHashCell newCell);
+};
+
+/// A ghash table cell for deduplicating types from TpiSources.
+class GHashCell {
+  uint64_t data = 0;
+
+public:
+  GHashCell() = default;
+
+  // Construct data most to least significant so that sorting works well:
+  // - isItem
+  // - tpiSrcIdx
+  // - ghashIdx
+  // Add one to the tpiSrcIdx so that the 0th record from the 0th source has a
+  // non-zero representation.
+  GHashCell(bool isItem, uint32_t tpiSrcIdx, uint32_t ghashIdx)
+      : data((uint64_t(isItem) << 63U) | (uint64_t(tpiSrcIdx + 1) << 32ULL) |
+             ghashIdx) {
+    assert(tpiSrcIdx == getTpiSrcIdx() && "round trip failure");
+    assert(ghashIdx == getGHashIdx() && "round trip failure");
+  }
+
+  explicit GHashCell(uint64_t data) : data(data) {}
+
+  // The empty cell is all zeros.
+  bool isEmpty() const { return data == 0ULL; }
+
+  /// Extract the tpiSrcIdx.
+  uint32_t getTpiSrcIdx() const {
+    return ((uint32_t)(data >> 32U) & 0x7FFFFFFF) - 1;
+  }
+
+  /// Extract the index into the ghash array of the TpiSource.
+  uint32_t getGHashIdx() const { return (uint32_t)data; }
+
+  bool isItem() const { return data & (1ULL << 63U); }
+
+  /// Get the ghash key for this cell.
+  GloballyHashedType getGHash() const {
+    return TpiSource::instances[getTpiSrcIdx()]->ghashes[getGHashIdx()];
+  }
+
+  /// The priority function for the cell. The data is stored such that lower
+  /// tpiSrcIdx and ghashIdx values are preferred, which means that type record
+  /// from earlier sources are more likely to prevail.
+  friend inline bool operator<(const GHashCell &l, const GHashCell &r) {
+    return l.data < r.data;
+  }
+};
+} // namespace
+
+namespace lld {
+namespace coff {
+/// This type is just a wrapper around GHashTable with external linkage so it
+/// can be used from a header.
+struct GHashState {
+  GHashTable table;
+};
+} // namespace coff
+} // namespace lld
+
+GHashTable::~GHashTable() { delete[] table; }
+
+void GHashTable::init(uint32_t newTableSize) {
+  table = new GHashCell[newTableSize];
+  memset(table, 0, newTableSize * sizeof(GHashCell));
+  tableSize = newTableSize;
+}
+
+uint32_t GHashTable::insert(GloballyHashedType ghash, GHashCell newCell) {
+  assert(!newCell.isEmpty() && "cannot insert empty cell value");
+
+  // FIXME: The low bytes of SHA1 have low entropy for short records, which
+  // type records are. Swap the byte order for better entropy. A better ghash
+  // won't need this.
+  uint32_t startIdx =
+      ByteSwap_64(*reinterpret_cast<uint64_t *>(&ghash)) % tableSize;
+
+  // Do a linear probe starting at startIdx.
+  uint32_t idx = startIdx;
+  while (true) {
+    // Run a compare and swap loop. There are four cases:
+    // - cell is empty: CAS into place and return
+    // - cell has matching key, earlier priority: do nothing, return
+    // - cell has matching key, later priority: CAS into place and return
+    // - cell has non-matching key: hash collision, probe next cell
+    auto *cellPtr = reinterpret_cast<std::atomic<GHashCell> *>(&table[idx]);
+    GHashCell oldCell(cellPtr->load());
+    while (oldCell.isEmpty() || oldCell.getGHash() == ghash) {
+      // Check if there is an existing ghash entry with a higher priority
+      // (earlier ordering). If so, this is a duplicate, we are done.
+      if (!oldCell.isEmpty() && oldCell < newCell)
+        return idx;
+      // Either the cell is empty, or our value is higher priority. Try to
+      // compare and swap. If it succeeds, we are done.
+      if (cellPtr->compare_exchange_weak(oldCell, newCell))
+        return idx;
+      // If the CAS failed, check this cell again.
+    }
+
+    // Advance the probe. Wrap around to the beginning if we run off the end.
+    ++idx;
+    idx = idx == tableSize ? 0 : idx;
+    if (idx == startIdx) {
+      // If this becomes an issue, we could mark failure and rehash from the
+      // beginning with a bigger table. There is no difference between rehashing
+      // internally and starting over.
+      report_fatal_error("ghash table is full");
+    }
+  }
+  llvm_unreachable("left infloop");
+}
+
+TypeMerger::TypeMerger(llvm::BumpPtrAllocator &alloc)
+    : typeTable(alloc), idTable(alloc) {}
+
+TypeMerger::~TypeMerger() = default;
+
+void TypeMerger::mergeTypesWithGHash() {
+  // Load ghashes. Do type servers and PCH objects first.
+  {
+    ScopedTimer t1(loadGHashTimer);
+    parallelForEach(TpiSource::dependencySources,
+                    [&](TpiSource *source) { source->loadGHashes(); });
+    parallelForEach(TpiSource::objectSources,
+                    [&](TpiSource *source) { source->loadGHashes(); });
+  }
+
+  ScopedTimer t2(mergeGHashTimer);
+  GHashState ghashState;
+
+  // Estimate the size of hash table needed to deduplicate ghashes. This *must*
+  // be larger than the number of unique types, or hash table insertion may not
+  // be able to find a vacant slot. Summing the input types guarantees this, but
+  // it is a gross overestimate. The table size could be reduced to save memory,
+  // but it would require implementing rehashing, and this table is generally
+  // small compared to total memory usage, at eight bytes per input type record,
+  // and most input type records are larger than eight bytes.
+  size_t tableSize = 0;
+  for (TpiSource *source : TpiSource::instances)
+    tableSize += source->ghashes.size();
+
+  // Cap the table size so that we can use 32-bit cell indices. Type indices are
+  // also 32-bit, so this is an inherent PDB file format limit anyway.
+  tableSize = std::min(size_t(INT32_MAX), tableSize);
+  ghashState.table.init(static_cast<uint32_t>(tableSize));
+
+  // Insert ghashes in parallel. During concurrent insertion, we cannot observe
+  // the contents of the hash table cell, but we can remember the insertion
+  // position. Because the table does not rehash, the position will not change
+  // under insertion. After insertion is done, the value of the cell can be read
+  // to retreive the final PDB type index.
+  parallelForEachN(0, TpiSource::instances.size(), [&](size_t tpiSrcIdx) {
+    TpiSource *source = TpiSource::instances[tpiSrcIdx];
+    source->indexMapStorage.resize(source->ghashes.size());
+    for (uint32_t i = 0, e = source->ghashes.size(); i < e; i++) {
+      if (source->shouldOmitFromPdb(i)) {
+        source->indexMapStorage[i] = TypeIndex(SimpleTypeKind::NotTranslated);
+        continue;
+      }
+      GloballyHashedType ghash = source->ghashes[i];
+      bool isItem = source->isItemIndex.test(i);
+      uint32_t cellIdx =
+          ghashState.table.insert(ghash, GHashCell(isItem, tpiSrcIdx, i));
+
+      // Store the ghash cell index as a type index in indexMapStorage. Later
+      // we will replace it with the PDB type index.
+      source->indexMapStorage[i] = TypeIndex::fromArrayIndex(cellIdx);
+    }
+  });
+
+  // Collect all non-empty cells and sort them. This will implicitly assign
+  // destination type indices, and partition the entries into type records and
+  // item records. It arranges types in this order:
+  // - type records
+  //   - source 0, type 0...
+  //   - source 1, type 1...
+  // - item records
+  //   - source 0, type 1...
+  //   - source 1, type 0...
+  std::vector<GHashCell> entries;
+  for (const GHashCell &cell :
+       makeArrayRef(ghashState.table.table, tableSize)) {
+    if (!cell.isEmpty())
+      entries.push_back(cell);
+  }
+  parallelSort(entries, std::less<GHashCell>());
+  log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n",
+              double(entries.size()) / tableSize, entries.size(), tableSize));
+
+  // Find out how many type and item indices there are.
+  auto mid =
+      std::lower_bound(entries.begin(), entries.end(), GHashCell(true, 0, 0));
+  assert((mid == entries.end() || mid->isItem()) &&
+         (mid == entries.begin() || !std::prev(mid)->isItem()) &&
+         "midpoint is not midpoint");
+  uint32_t numTypes = std::distance(entries.begin(), mid);
+  uint32_t numItems = std::distance(mid, entries.end());
+  log("Tpi record count: " + Twine(numTypes));
+  log("Ipi record count: " + Twine(numItems));
+
+  // Make a list of the "unique" type records to merge for each tpi source. Type
+  // merging will skip indices not on this list. Store the destination PDB type
+  // index for these unique types in the tpiMap for each source. The entries for
+  // non-unique types will be filled in prior to type merging.
+  for (uint32_t i = 0, e = entries.size(); i < e; ++i) {
+    auto &cell = entries[i];
+    uint32_t tpiSrcIdx = cell.getTpiSrcIdx();
+    TpiSource *source = TpiSource::instances[tpiSrcIdx];
+    source->uniqueTypes.push_back(cell.getGHashIdx());
+
+    // Update the ghash table to store the destination PDB type index in the
+    // table.
+    uint32_t pdbTypeIndex = i < numTypes ? i : i - numTypes;
+    uint32_t ghashCellIndex =
+        source->indexMapStorage[cell.getGHashIdx()].toArrayIndex();
+    ghashState.table.table[ghashCellIndex] =
+        GHashCell(cell.isItem(), cell.getTpiSrcIdx(), pdbTypeIndex);
+  }
+
+  // In parallel, remap all types.
+  for_each(TpiSource::dependencySources, [&](TpiSource *source) {
+    source->remapTpiWithGHashes(&ghashState);
+  });
+  parallelForEach(TpiSource::objectSources, [&](TpiSource *source) {
+    source->remapTpiWithGHashes(&ghashState);
+  });
+
+  TpiSource::clearGHashes();
+}
+
+/// Given the index into the ghash table for a particular type, return the type
+/// index for that type in the output PDB.
+static TypeIndex loadPdbTypeIndexFromCell(GHashState *g,
+                                          uint32_t ghashCellIdx) {
+  GHashCell cell = g->table.table[ghashCellIdx];
+  return TypeIndex::fromArrayIndex(cell.getGHashIdx());
+}
+
+// Fill in a TPI or IPI index map using ghashes. For each source type, use its
+// ghash to lookup its final type index in the PDB, and store that in the map.
+void TpiSource::fillMapFromGHashes(GHashState *g,
+                                   SmallVectorImpl<TypeIndex> &mapToFill) {
+  for (size_t i = 0, e = ghashes.size(); i < e; ++i) {
+    TypeIndex fakeCellIndex = indexMapStorage[i];
+    if (fakeCellIndex.isSimple())
+      mapToFill[i] = fakeCellIndex;
+    else
+      mapToFill[i] = loadPdbTypeIndexFromCell(g, fakeCellIndex.toArrayIndex());
+  }
+}
+
+void TpiSource::clearGHashes() {
+  for (TpiSource *src : TpiSource::instances) {
+    if (src->ownedGHashes)
+      delete[] src->ghashes.data();
+    src->ghashes = {};
+    src->isItemIndex.clear();
+    src->uniqueTypes.clear();
+  }
+}
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -69,13 +69,13 @@
   lld::stderrOS = &stderrOS;
 
   errorHandler().cleanupCallback = []() {
+    TpiSource::clear();
     freeArena();
     ObjFile::instances.clear();
     PDBInputFile::instances.clear();
     ImportFile::instances.clear();
     BitcodeFile::instances.clear();
     memset(MergeChunk::instances, 0, sizeof(MergeChunk::instances));
-    TpiSource::clear();
     OutputSection::clear();
   };
 
diff --git a/lld/COFF/PDB.h b/lld/COFF/PDB.h
--- a/lld/COFF/PDB.h
+++ b/lld/COFF/PDB.h
@@ -20,6 +20,8 @@
 }
 
 namespace lld {
+class Timer;
+
 namespace coff {
 class OutputSection;
 class SectionChunk;
@@ -32,6 +34,10 @@
 
 llvm::Optional<std::pair<llvm::StringRef, uint32_t>>
 getFileLineCodeView(const SectionChunk *c, uint32_t addr);
+
+extern Timer loadGHashTimer;
+extern Timer mergeGHashTimer;
+
 } // namespace coff
 } // namespace lld
 
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -66,7 +66,8 @@
 static ExitOnError exitOnErr;
 
 static Timer totalPdbLinkTimer("PDB Emission (Cumulative)", Timer::root());
-
+Timer lld::coff::loadGHashTimer("Global Type Hashing", totalPdbLinkTimer);
+Timer lld::coff::mergeGHashTimer("GHash Type Merging", totalPdbLinkTimer);
 static Timer addObjectsTimer("Add Objects", totalPdbLinkTimer);
 static Timer typeMergingTimer("Type Merging", addObjectsTimer);
 static Timer symbolMergingTimer("Symbol Merging", addObjectsTimer);
@@ -112,8 +113,6 @@
   /// externally.
   void addDebug(TpiSource *source);
 
-  bool mergeTypeRecords(TpiSource *source);
-
   void addDebugSymbols(TpiSource *source);
 
   void mergeSymbolRecords(TpiSource *source,
@@ -250,43 +249,18 @@
   });
 }
 
-static bool remapTypeIndex(TypeIndex &ti, ArrayRef<TypeIndex> typeIndexMap) {
-  if (ti.isSimple())
-    return true;
-  if (ti.toArrayIndex() >= typeIndexMap.size())
-    return false;
-  ti = typeIndexMap[ti.toArrayIndex()];
-  return true;
-}
-
-static void remapTypesInSymbolRecord(ObjFile *file, SymbolKind symKind,
-                                     MutableArrayRef<uint8_t> recordBytes,
-                                     TpiSource *source,
-                                     ArrayRef<TiReference> typeRefs) {
-  MutableArrayRef<uint8_t> contents =
-      recordBytes.drop_front(sizeof(RecordPrefix));
-  for (const TiReference &ref : typeRefs) {
-    unsigned byteSize = ref.Count * sizeof(TypeIndex);
-    if (contents.size() < ref.Offset + byteSize)
-      fatal("symbol record too short");
-
-    // This can be an item index or a type index. Choose the appropriate map.
-    bool isItemIndex = ref.Kind == TiRefKind::IndexRef;
-    ArrayRef<TypeIndex> typeOrItemMap =
-        isItemIndex ? source->ipiMap : source->tpiMap;
-
-    MutableArrayRef<TypeIndex> tIs(
-        reinterpret_cast<TypeIndex *>(contents.data() + ref.Offset), ref.Count);
-    for (TypeIndex &ti : tIs) {
-      if (!remapTypeIndex(ti, typeOrItemMap)) {
-        log("ignoring symbol record of kind 0x" + utohexstr(symKind) + " in " +
-            file->getName() + " with bad " + (isItemIndex ? "item" : "type") +
-            " index 0x" + utohexstr(ti.getIndex()));
-        ti = TypeIndex(SimpleTypeKind::NotTranslated);
-        continue;
-      }
-    }
-  }
+static void addGHashTypeInfo(pdb::PDBFileBuilder &builder) {
+  // Start the TPI or IPI stream header.
+  builder.getTpiBuilder().setVersionHeader(pdb::PdbTpiV80);
+  builder.getIpiBuilder().setVersionHeader(pdb::PdbTpiV80);
+  for_each(TpiSource::instances, [&](TpiSource *source) {
+    builder.getTpiBuilder().addTypeRecords(source->mergedTpi.recs,
+                                           source->mergedTpi.recSizes,
+                                           source->mergedTpi.recHashes);
+    builder.getIpiBuilder().addTypeRecords(source->mergedIpi.recs,
+                                           source->mergedIpi.recSizes,
+                                           source->mergedIpi.recHashes);
+  });
 }
 
 static void
@@ -329,7 +303,7 @@
 
 /// MSVC translates S_PROC_ID_END to S_END, and S_[LG]PROC32_ID to S_[LG]PROC32
 static void translateIdSymbols(MutableArrayRef<uint8_t> &recordData,
-                               TypeCollection &idTable) {
+                               TypeMerger &tMerger, TpiSource *source) {
   RecordPrefix *prefix = reinterpret_cast<RecordPrefix *>(recordData.data());
 
   SymbolKind kind = symbolKind(recordData);
@@ -356,13 +330,25 @@
         reinterpret_cast<TypeIndex *>(content.data() + refs[0].Offset);
     // `ti` is the index of a FuncIdRecord or MemberFuncIdRecord which lives in
     // the IPI stream, whose `FunctionType` member refers to the TPI stream.
-    // Note that LF_FUNC_ID and LF_MEMFUNC_ID have the same record layout, and
+    // Note that LF_FUNC_ID and LF_MFUNC_ID have the same record layout, and
     // in both cases we just need the second type index.
     if (!ti->isSimple() && !ti->isNoneType()) {
-      CVType funcIdData = idTable.getType(*ti);
-      ArrayRef<uint8_t> tiBuf = funcIdData.data().slice(8, 4);
-      assert(tiBuf.size() == 4 && "corrupt LF_[MEM]FUNC_ID record");
-      *ti = *reinterpret_cast<const TypeIndex *>(tiBuf.data());
+      if (config->debugGHashes) {
+        auto idToType = source->funcIdToType.find(*ti);
+        if (idToType == source->funcIdToType.end()) {
+          warn(formatv("S_[GL]PROC32_ID record in {0} refers to PDB item "
+                       "index {1:X} which is not a LF_[M]FUNC_ID record",
+                       source->file->getName(), ti->getIndex()));
+          *ti = TypeIndex(SimpleTypeKind::NotTranslated);
+        } else {
+          *ti = idToType->second;
+        }
+      } else {
+        CVType funcIdData = tMerger.getIDTable().getType(*ti);
+        ArrayRef<uint8_t> tiBuf = funcIdData.data().slice(8, 4);
+        assert(tiBuf.size() == 4 && "corrupt LF_[M]FUNC_ID record");
+        *ti = *reinterpret_cast<const TypeIndex *>(tiBuf.data());
+      }
     }
 
     kind = (kind == SymbolKind::S_GPROC32_ID) ? SymbolKind::S_GPROC32
@@ -561,22 +547,16 @@
               const_cast<uint8_t *>(sym.data().data()), sym.length());
         }
 
-        // Discover type index references in the record. Skip it if we don't
-        // know where they are.
-        SmallVector<TiReference, 32> typeRefs;
-        if (!discoverTypeIndicesInSymbol(sym, typeRefs)) {
-          log("ignoring unknown symbol record with kind 0x" +
-              utohexstr(sym.kind()));
+        // Re-map all the type index references.
+        if (!source->remapTypesInSymbolRecord(recordBytes)) {
+          log("error remapping types in symbol of kind 0x" +
+              utohexstr(sym.kind()) + ", ignoring");
           return Error::success();
         }
 
-        // Re-map all the type index references.
-        remapTypesInSymbolRecord(file, sym.kind(), recordBytes, source,
-                                 typeRefs);
-
         // An object file may have S_xxx_ID symbols, but these get converted to
         // "real" symbols in a PDB.
-        translateIdSymbols(recordBytes, tMerger.getIDTable());
+        translateIdSymbols(recordBytes, tMerger, source);
         sym = CVSymbol(recordBytes);
 
         // If this record refers to an offset in the object file's string table,
@@ -748,11 +728,15 @@
     const DebugSubsectionRecord &inlineeSubsection) {
   DebugInlineeLinesSubsectionRef inlineeLines;
   exitOnErr(inlineeLines.initialize(inlineeSubsection.getRecordData()));
+  if (!source) {
+    warn("ignoring inlinee lines section in file that lacks type information");
+    return;
+  }
 
   // Remap type indices in inlinee line records in place.
   for (const InlineeSourceLine &line : inlineeLines) {
     TypeIndex &inlinee = *const_cast<TypeIndex *>(&line.Header->Inlinee);
-    if (!remapTypeIndex(inlinee, source->ipiMap)) {
+    if (!source->remapTypeIndex(inlinee, TiRefKind::IndexRef)) {
       log("bad inlinee line record in " + file.getName() +
           " with bad inlinee index 0x" + utohexstr(inlinee.getIndex()));
     }
@@ -827,20 +811,6 @@
     warn(msg);
 }
 
-bool PDBLinker::mergeTypeRecords(TpiSource *source) {
-  ScopedTimer t(typeMergingTimer);
-  // Before we can process symbol substreams from .debug$S, we need to process
-  // type information, file checksums, and the string table.  Add type info to
-  // the PDB first, so that we can get the map from object file type and item
-  // indices to PDB type and item indices.
-  if (Error e = source->mergeDebugT(&tMerger)) {
-    // If the .debug$T sections fail to merge, assume there is no debug info.
-    warnUnusable(source->file, std::move(e));
-    return false;
-  }
-  return true;
-}
-
 // Allocate memory for a .debug$S / .debug$F section and relocate it.
 static ArrayRef<uint8_t> relocateDebugChunk(SectionChunk &debugChunk) {
   uint8_t *buffer = bAlloc.Allocate<uint8_t>(debugChunk.getSize());
@@ -920,9 +890,27 @@
 }
 
 void PDBLinker::addDebug(TpiSource *source) {
-  // If type merging failed, ignore the symbols.
-  if (mergeTypeRecords(source))
-    addDebugSymbols(source);
+  // Before we can process symbol substreams from .debug$S, we need to process
+  // type information, file checksums, and the string table. Add type info to
+  // the PDB first, so that we can get the map from object file type and item
+  // indices to PDB type and item indices.  If we are using ghashes, types have
+  // already been merged.
+  if (!config->debugGHashes) {
+    ScopedTimer t(typeMergingTimer);
+    if (Error e = source->mergeDebugT(&tMerger)) {
+      // If type merging failed, ignore the symbols.
+      warnUnusable(source->file, std::move(e));
+      return;
+    }
+  } else {
+    // If type merging failed, ignore the symbols.
+    if (source->typeMergingError) {
+      warnUnusable(source->file, std::move(source->typeMergingError));
+      return;
+    }
+  }
+
+  addDebugSymbols(source);
 }
 
 static pdb::BulkPublic createPublic(Defined *def) {
@@ -955,25 +943,31 @@
   for_each(ObjFile::instances,
            [&](ObjFile *obj) { createModuleDBI(builder, obj); });
 
-  // Merge dependencies
-  TpiSource::forEachSource([&](TpiSource *source) {
-    if (source->isDependency())
-      addDebug(source);
-  });
+  // Reorder dependency type sources to come first.
+  TpiSource::sortDependencies();
 
-  // Merge regular and dependent OBJs
-  TpiSource::forEachSource([&](TpiSource *source) {
-    if (!source->isDependency())
-      addDebug(source);
-  });
+  // Merge type information from input files using global type hashing.
+  if (config->debugGHashes)
+    tMerger.mergeTypesWithGHash();
+
+  // Merge dependencies and then regular objects.
+  for_each(TpiSource::dependencySources,
+           [&](TpiSource *source) { addDebug(source); });
+  for_each(TpiSource::objectSources,
+           [&](TpiSource *source) { addDebug(source); });
 
   builder.getStringTableBuilder().setStrings(pdbStrTab);
   t1.stop();
 
   // Construct TPI and IPI stream contents.
   ScopedTimer t2(tpiStreamLayoutTimer);
-  addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable());
-  addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable());
+  // Collect all the merged types.
+  if (config->debugGHashes) {
+    addGHashTypeInfo(builder);
+  } else {
+    addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable());
+    addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable());
+  }
   t2.stop();
 }
 
@@ -1014,8 +1008,8 @@
         "Input OBJ files (expanded from all cmd-line inputs)");
   print(TpiSource::countTypeServerPDBs(), "PDB type server dependencies");
   print(TpiSource::countPrecompObjs(), "Precomp OBJ dependencies");
-  print(tMerger.getTypeTable().size() + tMerger.getIDTable().size(),
-        "Merged TPI records");
+  print(builder.getTpiBuilder().getRecordCount(), "Merged TPI records");
+  print(builder.getIpiBuilder().getRecordCount(), "Merged IPI records");
   print(pdbStrTab.size(), "Output PDB strings");
   print(globalSymbols, "Global symbol records");
   print(moduleSymbols, "Module symbol records");
@@ -1067,8 +1061,11 @@
     }
   };
 
-  printLargeInputTypeRecs("TPI", tMerger.tpiCounts, tMerger.getTypeTable());
-  printLargeInputTypeRecs("IPI", tMerger.ipiCounts, tMerger.getIDTable());
+  if (!config->debugGHashes) {
+    // FIXME: Reimplement for ghash.
+    printLargeInputTypeRecs("TPI", tMerger.tpiCounts, tMerger.getTypeTable());
+    printLargeInputTypeRecs("IPI", tMerger.ipiCounts, tMerger.getIDTable());
+  }
 
   message(buffer);
 }
diff --git a/lld/COFF/TypeMerger.h b/lld/COFF/TypeMerger.h
--- a/lld/COFF/TypeMerger.h
+++ b/lld/COFF/TypeMerger.h
@@ -10,45 +10,47 @@
 #define LLD_COFF_TYPEMERGER_H
 
 #include "Config.h"
-#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
 #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeHashing.h"
 #include "llvm/Support/Allocator.h"
+#include <atomic>
 
 namespace lld {
 namespace coff {
 
+using llvm::codeview::GloballyHashedType;
+using llvm::codeview::TypeIndex;
+
+struct GHashState;
+
 class TypeMerger {
 public:
-  TypeMerger(llvm::BumpPtrAllocator &alloc)
-      : typeTable(alloc), idTable(alloc), globalTypeTable(alloc),
-        globalIDTable(alloc) {}
+  TypeMerger(llvm::BumpPtrAllocator &alloc);
+
+  ~TypeMerger();
 
   /// Get the type table or the global type table if /DEBUG:GHASH is enabled.
   inline llvm::codeview::TypeCollection &getTypeTable() {
-    if (config->debugGHashes)
-      return globalTypeTable;
+    assert(!config->debugGHashes);
     return typeTable;
   }
 
   /// Get the ID table or the global ID table if /DEBUG:GHASH is enabled.
   inline llvm::codeview::TypeCollection &getIDTable() {
-    if (config->debugGHashes)
-      return globalIDTable;
+    assert(!config->debugGHashes);
     return idTable;
   }
 
+  /// Use global hashes to eliminate duplicate types and identify unique type
+  /// indices in each TpiSource.
+  void mergeTypesWithGHash();
+
   /// Type records that will go into the PDB TPI stream.
   llvm::codeview::MergingTypeTableBuilder typeTable;
 
   /// Item records that will go into the PDB IPI stream.
   llvm::codeview::MergingTypeTableBuilder idTable;
 
-  /// Type records that will go into the PDB TPI stream (for /DEBUG:GHASH)
-  llvm::codeview::GlobalTypeTableBuilder globalTypeTable;
-
-  /// Item records that will go into the PDB IPI stream (for /DEBUG:GHASH)
-  llvm::codeview::GlobalTypeTableBuilder globalIDTable;
-
   // When showSummary is enabled, these are histograms of TPI and IPI records
   // keyed by type index.
   SmallVector<uint32_t, 0> tpiCounts;
diff --git a/lld/include/lld/Common/ErrorHandler.h b/lld/include/lld/Common/ErrorHandler.h
--- a/lld/include/lld/Common/ErrorHandler.h
+++ b/lld/include/lld/Common/ErrorHandler.h
@@ -153,6 +153,13 @@
   return std::move(*e);
 }
 
+// Don't move from Expected wrappers around references.
+template <class T> T &check(Expected<T &> e) {
+  if (!e)
+    fatal(llvm::toString(e.takeError()));
+  return *e;
+}
+
 template <class T>
 T check2(ErrorOr<T> e, llvm::function_ref<std::string()> prefix) {
   if (auto ec = e.getError())
diff --git a/lld/test/COFF/pdb-global-hashes.test b/lld/test/COFF/pdb-global-hashes.test
--- a/lld/test/COFF/pdb-global-hashes.test
+++ b/lld/test/COFF/pdb-global-hashes.test
@@ -2,7 +2,7 @@
 RUN: yaml2obj %p/Inputs/pdb-hashes-2.yaml -o %t.2.obj
 RUN: yaml2obj %p/Inputs/pdb-hashes-2-missing.yaml -o %t.2.missing.obj
 RUN: lld-link /debug %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.nohash.pdb
-RUN: lld-link /debug:ghash %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.hash.pdb
+RUN: lld-link /debug:ghash -verbose %t.1.obj %t.2.obj /entry:main /nodefaultlib /PDB:%t.hash.pdb
 RUN: lld-link /debug:ghash %t.1.obj %t.2.missing.obj /entry:main /nodefaultlib /PDB:%t.mixed.pdb
 RUN: llvm-pdbutil dump -types -ids -dont-resolve-forward-refs %t.nohash.pdb | FileCheck %s
 RUN: llvm-pdbutil dump -types -ids -dont-resolve-forward-refs %t.hash.pdb | FileCheck %s
diff --git a/lld/test/COFF/pdb-procid-remapping.test b/lld/test/COFF/pdb-procid-remapping.test
--- a/lld/test/COFF/pdb-procid-remapping.test
+++ b/lld/test/COFF/pdb-procid-remapping.test
@@ -1,8 +1,12 @@
-# RUN: yaml2obj %p/Inputs/pdb1.yaml -o %t1.obj
-# RUN: yaml2obj %p/Inputs/pdb2.yaml -o %t2.obj
+# RUN: yaml2obj < %p/Inputs/pdb1.yaml > %t1.obj
+# RUN: yaml2obj < %p/Inputs/pdb2.yaml > %t2.obj
+
 # RUN: lld-link /debug /pdb:%t.pdb /dll /out:%t.dll /entry:main /nodefaultlib \
 # RUN:   %t1.obj %t2.obj
+# RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s
 
+# RUN: lld-link /debug /debug:ghash /pdb:%t.pdb /dll /out:%t.dll /entry:main /nodefaultlib \
+# RUN:   %t1.obj %t2.obj
 # RUN: llvm-pdbutil dump -symbols %t.pdb | FileCheck %s
 
 CHECK:                                Symbols
diff --git a/lld/test/COFF/pdb-type-server-missing.yaml b/lld/test/COFF/pdb-type-server-missing.yaml
--- a/lld/test/COFF/pdb-type-server-missing.yaml
+++ b/lld/test/COFF/pdb-type-server-missing.yaml
@@ -5,6 +5,7 @@
 # RUN: yaml2obj %s -o %t1.obj
 # RUN: yaml2obj %p/Inputs/pdb-type-server-missing-2.yaml -o %t2.obj
 # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s -check-prefix=WARN
+# RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug:ghash -pdb:%t.pdb -nodefaultlib -entry:main 2>&1 | FileCheck %s -check-prefix=WARN
 # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /ignore:4099 2>&1 | FileCheck %s -check-prefix=IGNORE -allow-empty
 # RUN: not lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /WX 2>&1 | FileCheck %s -check-prefix=ERR
 # RUN: lld-link %t1.obj %t2.obj -out:%t.exe -debug -pdb:%t.pdb -nodefaultlib -entry:main /ignore:4099 /WX 2>&1 | FileCheck %s -check-prefix=IGNORE-ERR -allow-empty
diff --git a/lld/test/COFF/pdb-type-server-simple.test b/lld/test/COFF/pdb-type-server-simple.test
--- a/lld/test/COFF/pdb-type-server-simple.test
+++ b/lld/test/COFF/pdb-type-server-simple.test
@@ -20,7 +20,11 @@
 RUN: yaml2obj %S/Inputs/pdb-type-server-simple-a.yaml -o a.obj
 RUN: yaml2obj %S/Inputs/pdb-type-server-simple-b.yaml -o b.obj
 RUN: llvm-pdbutil yaml2pdb %S/Inputs/pdb-type-server-simple-ts.yaml -pdb ts.pdb
-RUN: lld-link a.obj b.obj -entry:main -debug -out:t.exe -pdb:t.pdb -nodefaultlib /summary | FileCheck %s -check-prefix SUMMARY
+RUN: lld-link a.obj b.obj -entry:main -debug -out:t.exe -pdb:t.pdb -nodefaultlib -summary | FileCheck %s -check-prefix SUMMARY
+RUN: llvm-pdbutil dump -symbols -types -ids -globals %t/t.pdb | FileCheck %s
+
+Re-run with /DEBUG:GHASH
+RUN: lld-link a.obj b.obj -entry:main -debug:ghash -out:t.exe -pdb:t.pdb -nodefaultlib -summary -verbose
 RUN: llvm-pdbutil dump -symbols -types -ids -globals %t/t.pdb | FileCheck %s
 
 
@@ -101,7 +105,8 @@
 SUMMARY-NEXT:               2 Input OBJ files (expanded from all cmd-line inputs)
 SUMMARY-NEXT:               1 PDB type server dependencies
 SUMMARY-NEXT:               0 Precomp OBJ dependencies
-SUMMARY-NEXT:              25 Merged TPI records
+SUMMARY-NEXT:               9 Merged TPI records
+SUMMARY-NEXT:              16 Merged IPI records
 SUMMARY-NEXT:               3 Output PDB strings
 SUMMARY-NEXT:               4 Global symbol records
 SUMMARY-NEXT:              14 Module symbol records
diff --git a/lld/test/COFF/precomp-link.test b/lld/test/COFF/precomp-link.test
--- a/lld/test/COFF/precomp-link.test
+++ b/lld/test/COFF/precomp-link.test
@@ -5,6 +5,7 @@
 RUN: llvm-pdbutil dump -types %t.pdb | FileCheck %s
 
 RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-invalid.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf 2>&1 | FileCheck %s -check-prefix FAILURE
+RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-invalid.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug:ghash /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf 2>&1 | FileCheck %s -check-prefix FAILURE
 
 FIXME: The following RUN line should fail, regardless of whether debug info is
 enabled or not. Normally this would result in an error due to missing _PchSym_
@@ -52,12 +53,19 @@
 CHECK-NOT: LF_ENDPRECOMP
 
 
+Re-run with ghash. Eventually, perhaps this will be the default.
+
+RUN: lld-link %S/Inputs/precomp-a.obj %S/Inputs/precomp-b.obj %S/Inputs/precomp.obj /nodefaultlib /entry:main /debug /debug:ghash /pdb:%t.pdb /out:%t.exe /opt:ref /opt:icf /summary | FileCheck %s -check-prefix SUMMARY
+RUN: llvm-pdbutil dump -types %t.pdb | FileCheck %s
+
+
 SUMMARY:                                     Summary
 SUMMARY-NEXT: --------------------------------------------------------------------------------
 SUMMARY-NEXT:               3 Input OBJ files (expanded from all cmd-line inputs)
 SUMMARY-NEXT:               0 PDB type server dependencies
 SUMMARY-NEXT:               1 Precomp OBJ dependencies
-SUMMARY-NEXT:            1044 Merged TPI records
+SUMMARY-NEXT:            874 Merged TPI records
+SUMMARY-NEXT:            170 Merged IPI records
 SUMMARY-NEXT:               5 Output PDB strings
 SUMMARY-NEXT:             167 Global symbol records
 SUMMARY-NEXT:              20 Module symbol records
diff --git a/lld/test/COFF/s_udt.s b/lld/test/COFF/s_udt.s
--- a/lld/test/COFF/s_udt.s
+++ b/lld/test/COFF/s_udt.s
@@ -2,6 +2,8 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-windows-msvc < %s > %t.obj
 # RUN: lld-link /DEBUG:FULL /nodefaultlib /entry:main %t.obj /PDB:%t.pdb /OUT:%t.exe
 # RUN: llvm-pdbutil dump -types -globals -symbols -modi=0 %t.pdb | FileCheck %s
+# RUN: lld-link /DEBUG:FULL /debug:ghash /nodefaultlib /entry:main %t.obj /PDB:%t.pdb /OUT:%t.exe
+# RUN: llvm-pdbutil dump -types -globals -symbols -modi=0 %t.pdb | FileCheck %s
 
 # CHECK:                               Types (TPI Stream)
 # CHECK-NEXT: ============================================================
diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h
--- a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h
@@ -86,6 +86,16 @@
 
   bool empty() const { return *(const uint64_t*)Hash.data() == 0; }
 
+  friend inline bool operator==(const GloballyHashedType &L,
+                                const GloballyHashedType &R) {
+    return L.Hash == R.Hash;
+  }
+
+  friend inline bool operator!=(const GloballyHashedType &L,
+                                const GloballyHashedType &R) {
+    return !(L.Hash == R.Hash);
+  }
+
   /// Given a sequence of bytes representing a record, compute a global hash for
   /// this record.  Due to the nature of global hashes incorporating the hashes
   /// of referenced records, this function requires a list of types and ids
@@ -206,7 +216,7 @@
 
   static bool isEqual(codeview::GloballyHashedType LHS,
                       codeview::GloballyHashedType RHS) {
-    return LHS.Hash == RHS.Hash;
+    return LHS == RHS;
   }
 };
 
diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
--- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
@@ -116,13 +116,22 @@
 
   uint32_t toArrayIndex() const {
     assert(!isSimple());
-    return getIndex() - FirstNonSimpleIndex;
+    return (getIndex() & ~DecoratedItemIdMask) - FirstNonSimpleIndex;
   }
 
   static TypeIndex fromArrayIndex(uint32_t Index) {
     return TypeIndex(Index + FirstNonSimpleIndex);
   }
 
+  static TypeIndex fromDecoratedArrayIndex(bool IsItem, uint32_t Index) {
+    return TypeIndex((Index + FirstNonSimpleIndex) |
+                     (IsItem ? DecoratedItemIdMask : 0));
+  }
+
+  TypeIndex removeDecoration() {
+    return TypeIndex(Index & ~DecoratedItemIdMask);
+  }
+
   SimpleTypeKind getSimpleKind() const {
     assert(isSimple());
     return static_cast<SimpleTypeKind>(Index & SimpleKindMask);
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
--- a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h
@@ -54,16 +54,20 @@
 
   void setVersionHeader(PdbRaw_TpiVer Version);
   void addTypeRecord(ArrayRef<uint8_t> Type, Optional<uint32_t> Hash);
+  void addTypeRecords(ArrayRef<uint8_t> Types, ArrayRef<uint16_t> Sizes,
+                      ArrayRef<uint32_t> Hashes);
 
   Error finalizeMsfLayout();
 
-  uint32_t getRecordCount() const { return TypeRecords.size(); }
+  uint32_t getRecordCount() const { return TypeRecordCount; }
 
   Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer);
 
   uint32_t calculateSerializedLength();
 
 private:
+  void updateTypeIndexOffsets(ArrayRef<uint16_t> Sizes);
+
   uint32_t calculateHashBufferSize() const;
   uint32_t calculateIndexOffsetSize() const;
   Error finalize();
@@ -71,10 +75,11 @@
   msf::MSFBuilder &Msf;
   BumpPtrAllocator &Allocator;
 
+  uint32_t TypeRecordCount = 0;
   size_t TypeRecordBytes = 0;
 
   PdbRaw_TpiVer VerHeader = PdbRaw_TpiVer::PdbTpiV80;
-  std::vector<ArrayRef<uint8_t>> TypeRecords;
+  std::vector<ArrayRef<uint8_t>> TypeRecBuffers;
   std::vector<uint32_t> TypeHashes;
   std::vector<codeview::TypeIndexOffset> TypeIndexOffsets;
   uint32_t HashStreamIndex = kInvalidStreamIndex;
diff --git a/llvm/lib/DebugInfo/CodeView/RecordName.cpp b/llvm/lib/DebugInfo/CodeView/RecordName.cpp
--- a/llvm/lib/DebugInfo/CodeView/RecordName.cpp
+++ b/llvm/lib/DebugInfo/CodeView/RecordName.cpp
@@ -9,6 +9,7 @@
 #include "llvm/DebugInfo/CodeView/RecordName.h"
 
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h"
@@ -77,9 +78,10 @@
   uint32_t Size = Indices.size();
   Name = "(";
   for (uint32_t I = 0; I < Size; ++I) {
-    assert(Indices[I] < CurrentTypeIndex);
-
-    Name.append(Types.getTypeName(Indices[I]));
+    if (Indices[I] < CurrentTypeIndex)
+      Name.append(Types.getTypeName(Indices[I]));
+    else
+      Name.append("<unknown 0x" + utohexstr(Indices[I].getIndex()) + ">");
     if (I + 1 != Size)
       Name.append(", ");
   }
diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
--- a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Support/Error.h"
 #include <algorithm>
 #include <cstdint>
+#include <numeric>
 
 using namespace llvm;
 using namespace llvm::msf;
@@ -41,39 +42,68 @@
   VerHeader = Version;
 }
 
+void TpiStreamBuilder::updateTypeIndexOffsets(ArrayRef<uint16_t> Sizes) {
+  // If we just crossed an 8KB threshold, add a type index offset.
+  for (uint16_t Size : Sizes) {
+    size_t NewSize = TypeRecordBytes + Size;
+    constexpr size_t EightKB = 8 * 1024;
+    if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecordCount == 0) {
+      TypeIndexOffsets.push_back(
+          {codeview::TypeIndex(codeview::TypeIndex::FirstNonSimpleIndex +
+                               TypeRecordCount),
+           ulittle32_t(TypeRecordBytes)});
+    }
+    ++TypeRecordCount;
+    TypeRecordBytes = NewSize;
+  }
+}
+
 void TpiStreamBuilder::addTypeRecord(ArrayRef<uint8_t> Record,
                                      Optional<uint32_t> Hash) {
-  // If we just crossed an 8KB threshold, add a type index offset.
   assert(((Record.size() & 3) == 0) &&
          "The type record's size is not a multiple of 4 bytes which will "
          "cause misalignment in the output TPI stream!");
-  size_t NewSize = TypeRecordBytes + Record.size();
-  constexpr size_t EightKB = 8 * 1024;
-  if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecords.empty()) {
-    TypeIndexOffsets.push_back(
-        {codeview::TypeIndex(codeview::TypeIndex::FirstNonSimpleIndex +
-                             TypeRecords.size()),
-         ulittle32_t(TypeRecordBytes)});
-  }
-  TypeRecordBytes = NewSize;
+  assert(Record.size() <= codeview::MaxRecordLength);
+  uint16_t OneSize = (uint16_t)Record.size();
+  updateTypeIndexOffsets(makeArrayRef(&OneSize, 1));
 
-  TypeRecords.push_back(Record);
+  TypeRecBuffers.push_back(Record);
+  // FIXME: Require it.
   if (Hash)
     TypeHashes.push_back(*Hash);
 }
 
+void TpiStreamBuilder::addTypeRecords(ArrayRef<uint8_t> Types,
+                                      ArrayRef<uint16_t> Sizes,
+                                      ArrayRef<uint32_t> Hashes) {
+  // Ignore empty type buffers. There should be no hashes or sizes in this case.
+  if (Types.empty()) {
+    assert(Sizes.empty() && Hashes.empty());
+    return;
+  }
+
+  assert(((Types.size() & 3) == 0) &&
+         "The type record's size is not a multiple of 4 bytes which will "
+         "cause misalignment in the output TPI stream!");
+  assert(Sizes.size() == Hashes.size() && "sizes and hashes should be in sync");
+  assert(std::accumulate(Sizes.begin(), Sizes.end(), 0U) == Types.size() &&
+         "sizes of type records should sum to the size of the types");
+  updateTypeIndexOffsets(Sizes);
+
+  TypeRecBuffers.push_back(Types);
+  TypeHashes.insert(TypeHashes.end(), Hashes.begin(), Hashes.end());
+}
+
 Error TpiStreamBuilder::finalize() {
   if (Header)
     return Error::success();
 
   TpiStreamHeader *H = Allocator.Allocate<TpiStreamHeader>();
 
-  uint32_t Count = TypeRecords.size();
-
   H->Version = VerHeader;
   H->HeaderSize = sizeof(TpiStreamHeader);
   H->TypeIndexBegin = codeview::TypeIndex::FirstNonSimpleIndex;
-  H->TypeIndexEnd = H->TypeIndexBegin + Count;
+  H->TypeIndexEnd = H->TypeIndexBegin + TypeRecordCount;
   H->TypeRecordBytes = TypeRecordBytes;
 
   H->HashStreamIndex = HashStreamIndex;
@@ -104,7 +134,7 @@
 }
 
 uint32_t TpiStreamBuilder::calculateHashBufferSize() const {
-  assert((TypeRecords.size() == TypeHashes.size() || TypeHashes.empty()) &&
+  assert((TypeRecordCount == TypeHashes.size() || TypeHashes.empty()) &&
          "either all or no type records should have hashes");
   return TypeHashes.size() * sizeof(ulittle32_t);
 }
@@ -155,7 +185,7 @@
   if (auto EC = Writer.writeObject(*Header))
     return EC;
 
-  for (auto Rec : TypeRecords) {
+  for (auto Rec : TypeRecBuffers) {
     assert(!Rec.empty() && "Attempting to write an empty type record shifts "
                            "all offsets in the TPI stream!");
     assert(((Rec.size() & 3) == 0) &&