Index: llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h
===================================================================
--- llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h
+++ llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h
@@ -632,6 +632,8 @@
   /// Return the list of <CalleeValueInfo, CalleeInfo> pairs.
   ArrayRef<EdgeTy> calls() const { return CallGraphEdgeList; }
 
+  void addCall(EdgeTy E) { CallGraphEdgeList.push_back(E); }
+
   /// Returns the list of type identifiers used by this function in
   /// llvm.type.test intrinsics other than by an llvm.assume intrinsic,
   /// represented as GUIDs.
@@ -1293,6 +1295,12 @@
     return nullptr;
   }
 
+  TypeIdSummary *getTypeIdSummary(StringRef TypeId) {
+    return const_cast<TypeIdSummary *>(
+        static_cast<const ModuleSummaryIndex *>(this)->getTypeIdSummary(
+            TypeId));
+  }
+
   const std::map<std::string, TypeIdCompatibleVtableInfo> &
   typeIdCompatibleVtableMap() const {
     return TypeIdCompatibleVtableMap;
Index: llvm/trunk/include/llvm/Transforms/IPO/WholeProgramDevirt.h
===================================================================
--- llvm/trunk/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ llvm/trunk/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -16,8 +16,10 @@
 
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/IPO/FunctionImport.h"
 #include <cassert>
 #include <cstdint>
+#include <set>
 #include <utility>
 #include <vector>
 
@@ -28,6 +30,7 @@
 class Function;
 class GlobalVariable;
 class ModuleSummaryIndex;
+struct ValueInfo;
 
 namespace wholeprogramdevirt {
 
@@ -228,6 +231,29 @@
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
 };
 
+struct VTableSlotSummary {
+  StringRef TypeID;
+  uint64_t ByteOffset;
+};
+
+/// Perform index-based whole program devirtualization on the \p Summary
+/// index. Any devirtualized targets used by a type test in another module
+/// are added to the \p ExportedGUIDs set. For any local devirtualized targets
+/// only used within the defining module, the information necessary for
+/// locating the corresponding WPD resolution is recorded for the ValueInfo
+/// in case it is exported by cross module importing (in which case the
+/// devirtualized target name will need adjustment).
+void runWholeProgramDevirtOnIndex(
+    ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
+    std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap);
+
+/// Call after cross-module importing to update the recorded single impl
+/// devirt target names for any locals that were exported.
+void updateIndexWPDForExports(
+    ModuleSummaryIndex &Summary,
+    StringMap<FunctionImporter::ExportSetTy> &ExportLists,
+    std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap);
+
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_IPO_WHOLEPROGRAMDEVIRT_H
Index: llvm/trunk/lib/LTO/LTO.cpp
===================================================================
--- llvm/trunk/lib/LTO/LTO.cpp
+++ llvm/trunk/lib/LTO/LTO.cpp
@@ -44,6 +44,7 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
 #include "llvm/Transforms/Utils/SplitModule.h"
 
@@ -1274,15 +1275,28 @@
   if (DumpThinCGSCCs)
     ThinLTO.CombinedIndex.dumpSCCs(outs());
 
+  std::set<GlobalValue::GUID> ExportedGUIDs;
+
+  // Perform index-based WPD. This will return immediately if there are
+  // no index entries in the typeIdMetadata map (e.g. if we are instead
+  // performing IR-based WPD in hybrid regular/thin LTO mode).
+  std::map<ValueInfo, std::vector<VTableSlotSummary>> LocalWPDTargetsMap;
+  runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs,
+                               LocalWPDTargetsMap);
+
   if (Conf.OptLevel > 0)
     ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
                              ImportLists, ExportLists);
 
+  // Update local devirtualized targets that were exported by cross-module
+  // importing
+  updateIndexWPDForExports(ThinLTO.CombinedIndex, ExportLists,
+                           LocalWPDTargetsMap);
+
   // Figure out which symbols need to be internalized. This also needs to happen
   // at -O0 because summary-based DCE is implemented using internalization, and
   // we must apply DCE consistently with the full LTO module in order to avoid
   // undefined references during the final link.
-  std::set<GlobalValue::GUID> ExportedGUIDs;
   for (auto &Res : GlobalResolutions) {
     // If the symbol does not have external references or it is not prevailing,
     // then not need to mark it as exported from a ThinLTO partition.
Index: llvm/trunk/lib/Transforms/IPO/WholeProgramDevirt.cpp
===================================================================
--- llvm/trunk/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ llvm/trunk/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -24,12 +24,14 @@
 //   returns 0, or a single vtable's function returns 1, replace each virtual
 //   call with a comparison of the vptr against that vtable's address.
 //
-// This pass is intended to be used during the regular and thin LTO pipelines.
+// This pass is intended to be used during the regular and thin LTO pipelines:
+//
 // During regular LTO, the pass determines the best optimization for each
 // virtual call and applies the resolutions directly to virtual calls that are
 // eligible for virtual call optimization (i.e. calls that use either of the
-// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During
-// ThinLTO, the pass operates in two phases:
+// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
+//
+// During hybrid Regular/ThinLTO, the pass operates in two phases:
 // - Export phase: this is run during the thin link over a single merged module
 //   that contains all vtables with !type metadata that participate in the link.
 //   The pass computes a resolution for each virtual call and stores it in the
@@ -38,6 +40,14 @@
 //   modules. The pass applies the resolutions previously computed during the
 //   import phase to each eligible virtual call.
 //
+// During ThinLTO, the pass operates in two phases:
+// - Export phase: this is run during the thin link over the index which
+//   contains a summary of all vtables with !type metadata that participate in
+//   the link. It computes a resolution for each virtual call and stores it in
+//   the type identifier summary. Only single implementation devirtualization
+//   is supported.
+// - Import phase: (same as with hybrid case above).
+//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -117,6 +127,11 @@
                 cl::desc("Maximum number of call targets per "
                          "call site to enable branch funnels"));
 
+static cl::opt<bool>
+    PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
+                       cl::init(false), cl::ZeroOrMore,
+                       cl::desc("Print index-based devirtualization messages"));
+
 // Find the minimum offset that we may store a value of size Size bits at. If
 // IsAfter is set, look for an offset before the object, otherwise look for an
 // offset after the object.
@@ -265,6 +280,25 @@
   }
 };
 
+template <> struct DenseMapInfo<VTableSlotSummary> {
+  static VTableSlotSummary getEmptyKey() {
+    return {DenseMapInfo<StringRef>::getEmptyKey(),
+            DenseMapInfo<uint64_t>::getEmptyKey()};
+  }
+  static VTableSlotSummary getTombstoneKey() {
+    return {DenseMapInfo<StringRef>::getTombstoneKey(),
+            DenseMapInfo<uint64_t>::getTombstoneKey()};
+  }
+  static unsigned getHashValue(const VTableSlotSummary &I) {
+    return DenseMapInfo<StringRef>::getHashValue(I.TypeID) ^
+           DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset);
+  }
+  static bool isEqual(const VTableSlotSummary &LHS,
+                      const VTableSlotSummary &RHS) {
+    return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
+  }
+};
+
 } // end namespace llvm
 
 namespace {
@@ -342,6 +376,7 @@
   /// pass the vector is non-empty, we will need to add a use of llvm.type.test
   /// to each of the function summaries in the vector.
   std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
+  std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
 
   bool isExported() const {
     return SummaryHasTypeTestAssumeUsers ||
@@ -358,6 +393,11 @@
     AllCallSitesDevirted = false;
   }
 
+  void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
+    SummaryTypeTestAssumeUsers.push_back(FS);
+    markSummaryHasTypeTestAssumeUsers();
+  }
+
   void markDevirt() {
     AllCallSitesDevirted = true;
 
@@ -542,6 +582,38 @@
                 function_ref<DominatorTree &(Function &)> LookupDomTree);
 };
 
+struct DevirtIndex {
+  ModuleSummaryIndex &ExportSummary;
+  // The set in which to record GUIDs exported from their module by
+  // devirtualization, used by client to ensure they are not internalized.
+  std::set<GlobalValue::GUID> &ExportedGUIDs;
+  // A map in which to record the information necessary to locate the WPD
+  // resolution for local targets in case they are exported by cross module
+  // importing.
+  std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
+
+  MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots;
+
+  DevirtIndex(
+      ModuleSummaryIndex &ExportSummary,
+      std::set<GlobalValue::GUID> &ExportedGUIDs,
+      std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
+      : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
+        LocalWPDTargetsMap(LocalWPDTargetsMap) {}
+
+  bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
+                                 const TypeIdCompatibleVtableInfo TIdInfo,
+                                 uint64_t ByteOffset);
+
+  bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
+                           VTableSlotSummary &SlotSummary,
+                           VTableSlotInfo &SlotInfo,
+                           WholeProgramDevirtResolution *Res,
+                           std::set<ValueInfo> &DevirtTargets);
+
+  void run();
+};
+
 struct WholeProgramDevirt : public ModulePass {
   static char ID;
 
@@ -632,6 +704,43 @@
   return PreservedAnalyses::none();
 }
 
+namespace llvm {
+void runWholeProgramDevirtOnIndex(
+    ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
+    std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
+  DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
+}
+
+void updateIndexWPDForExports(
+    ModuleSummaryIndex &Summary,
+    StringMap<FunctionImporter::ExportSetTy> &ExportLists,
+    std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
+  for (auto &T : LocalWPDTargetsMap) {
+    auto &VI = T.first;
+    // This was enforced earlier during trySingleImplDevirt.
+    assert(VI.getSummaryList().size() == 1 &&
+           "Devirt of local target has more than one copy");
+    auto &S = VI.getSummaryList()[0];
+    const auto &ExportList = ExportLists.find(S->modulePath());
+    if (ExportList == ExportLists.end() ||
+        !ExportList->second.count(VI.getGUID()))
+      continue;
+
+    // It's been exported by a cross module import.
+    for (auto &SlotSummary : T.second) {
+      auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
+      assert(TIdSum);
+      auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
+      assert(WPDRes != TIdSum->WPDRes.end());
+      WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
+          WPDRes->second.SingleImplName,
+          Summary.getModuleHash(S->modulePath()));
+    }
+  }
+}
+
+} // end namespace llvm
+
 bool DevirtModule::runForTesting(
     Module &M, function_ref<AAResults &(Function &)> AARGetter,
     function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
@@ -766,6 +875,34 @@
   return !TargetsForSlot.empty();
 }
 
+bool DevirtIndex::tryFindVirtualCallTargets(
+    std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo,
+    uint64_t ByteOffset) {
+  for (const TypeIdOffsetVtableInfo P : TIdInfo) {
+    // VTable initializer should have only one summary, or all copies must be
+    // linkonce/weak ODR.
+    assert(P.VTableVI.getSummaryList().size() == 1 ||
+           llvm::all_of(
+               P.VTableVI.getSummaryList(),
+               [&](const std::unique_ptr<GlobalValueSummary> &Summary) {
+                 return GlobalValue::isLinkOnceODRLinkage(Summary->linkage()) ||
+                        GlobalValue::isWeakODRLinkage(Summary->linkage());
+               }));
+    const auto *VS = cast<GlobalVarSummary>(P.VTableVI.getSummaryList()[0].get());
+    if (!P.VTableVI.getSummaryList()[0]->isLive())
+      continue;
+    for (auto VTP : VS->vTableFuncs()) {
+      if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
+        continue;
+
+      TargetsForSlot.push_back(VTP.FuncVI);
+    }
+  }
+
+  // Give up if we couldn't find any targets.
+  return !TargetsForSlot.empty();
+}
+
 void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
                                          Constant *TheFn, bool &IsExported) {
   auto Apply = [&](CallSiteInfo &CSInfo) {
@@ -837,6 +974,83 @@
   return true;
 }
 
+bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
+                                      VTableSlotSummary &SlotSummary,
+                                      VTableSlotInfo &SlotInfo,
+                                      WholeProgramDevirtResolution *Res,
+                                      std::set<ValueInfo> &DevirtTargets) {
+  // See if the program contains a single implementation of this virtual
+  // function.
+  auto TheFn = TargetsForSlot[0];
+  for (auto &&Target : TargetsForSlot)
+    if (TheFn != Target)
+      return false;
+
+  // Don't devirtualize if we don't have target definition.
+  auto Size = TheFn.getSummaryList().size();
+  if (!Size)
+    return false;
+
+  // If the summary list contains multiple summaries where at least one is
+  // a local, give up, as we won't know which (possibly promoted) name to use.
+  for (auto &S : TheFn.getSummaryList())
+    if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1)
+      return false;
+
+  // Collect functions devirtualized at least for one call site for stats.
+  if (PrintSummaryDevirt)
+    DevirtTargets.insert(TheFn);
+
+  auto &S = TheFn.getSummaryList()[0];
+  bool IsExported = false;
+
+  // Insert calls into the summary index so that the devirtualized targets
+  // are eligible for import.
+  // FIXME: Annotate type tests with hotness. For now, mark these as hot
+  // to better ensure we have the opportunity to inline them.
+  CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0);
+  auto AddCalls = [&](CallSiteInfo &CSInfo) {
+    for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
+      FS->addCall({TheFn, CI});
+      IsExported |= S->modulePath() != FS->modulePath();
+    }
+    for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
+      FS->addCall({TheFn, CI});
+      IsExported |= S->modulePath() != FS->modulePath();
+    }
+  };
+  AddCalls(SlotInfo.CSInfo);
+  for (auto &P : SlotInfo.ConstCSInfo)
+    AddCalls(P.second);
+
+  if (IsExported)
+    ExportedGUIDs.insert(TheFn.getGUID());
+
+  // Record in summary for use in devirtualization during the ThinLTO import
+  // step.
+  Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
+  if (GlobalValue::isLocalLinkage(S->linkage())) {
+    if (IsExported)
+      // If target is a local function and we are exporting it by
+      // devirtualizing a call in another module, we need to record the
+      // promoted name.
+      Res->SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
+          TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
+    else {
+      LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
+      Res->SingleImplName = TheFn.name();
+    }
+  } else
+    Res->SingleImplName = TheFn.name();
+
+  // Name will be empty if this thin link driven off of serialized combined
+  // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
+  // legacy LTO API anyway.
+  assert(!Res->SingleImplName.empty());
+
+  return true;
+}
+
 void DevirtModule::tryICallBranchFunnel(
     MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
     WholeProgramDevirtResolution *Res, VTableSlot Slot) {
@@ -1486,8 +1700,11 @@
 }
 
 void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
+  auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
+  if (!TypeId)
+    return;
   const TypeIdSummary *TidSummary =
-      ImportSummary->getTypeIdSummary(cast<MDString>(Slot.TypeID)->getString());
+      ImportSummary->getTypeIdSummary(TypeId->getString());
   if (!TidSummary)
     return;
   auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
@@ -1496,6 +1713,7 @@
   const WholeProgramDevirtResolution &Res = ResI->second;
 
   if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {
+    assert(!Res.SingleImplName.empty());
     // The type of the function in the declaration is irrelevant because every
     // call site will cast it to the correct type.
     Constant *SingleImpl =
@@ -1713,7 +1931,7 @@
       using namespace ore;
       OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
                         << "devirtualized "
-                        << NV("FunctionName", F->getName()));
+                        << NV("FunctionName", DT.first));
     }
   }
 
@@ -1727,3 +1945,78 @@
 
   return true;
 }
+
+void DevirtIndex::run() {
+  if (ExportSummary.typeIdCompatibleVtableMap().empty())
+    return;
+
+  DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID;
+  for (auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
+    NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first);
+  }
+
+  // Collect information from summary about which calls to try to devirtualize.
+  for (auto &P : ExportSummary) {
+    for (auto &S : P.second.SummaryList) {
+      auto *FS = dyn_cast<FunctionSummary>(S.get());
+      if (!FS)
+        continue;
+      // FIXME: Only add live functions.
+      for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
+        for (StringRef Name : NameByGUID[VF.GUID]) {
+          CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
+        }
+      }
+      for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
+        for (StringRef Name : NameByGUID[VF.GUID]) {
+          CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
+        }
+      }
+      for (const FunctionSummary::ConstVCall &VC :
+           FS->type_test_assume_const_vcalls()) {
+        for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
+          CallSlots[{Name, VC.VFunc.Offset}]
+              .ConstCSInfo[VC.Args]
+              .addSummaryTypeTestAssumeUser(FS);
+        }
+      }
+      for (const FunctionSummary::ConstVCall &VC :
+           FS->type_checked_load_const_vcalls()) {
+        for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
+          CallSlots[{Name, VC.VFunc.Offset}]
+              .ConstCSInfo[VC.Args]
+              .addSummaryTypeCheckedLoadUser(FS);
+        }
+      }
+    }
+  }
+
+  std::set<ValueInfo> DevirtTargets;
+  // For each (type, offset) pair:
+  for (auto &S : CallSlots) {
+    // Search each of the members of the type identifier for the virtual
+    // function implementation at offset S.first.ByteOffset, and add to
+    // TargetsForSlot.
+    std::vector<ValueInfo> TargetsForSlot;
+    auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
+    assert(TidSummary);
+    if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
+                                  S.first.ByteOffset)) {
+      WholeProgramDevirtResolution *Res =
+          &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID)
+               .WPDRes[S.first.ByteOffset];
+
+      if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
+                               DevirtTargets))
+        continue;
+    }
+  }
+
+  // Optionally have the thin link print message for each devirtualized
+  // function.
+  if (PrintSummaryDevirt)
+    for (const auto &DT : DevirtTargets)
+      errs() << "Devirtualized call to " << DT << "\n";
+
+  return;
+}
Index: llvm/trunk/test/ThinLTO/X86/Inputs/devirt2.ll
===================================================================
--- llvm/trunk/test/ThinLTO/X86/Inputs/devirt2.ll
+++ llvm/trunk/test/ThinLTO/X86/Inputs/devirt2.ll
@@ -0,0 +1,59 @@
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.A = type { i32 (...)** }
+%struct.B = type { %struct.A }
+%struct.C = type { %struct.A }
+%struct.D = type { i32 (...)** }
+%struct.E = type { i32 (...)** }
+
+@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1
+@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2
+@_ZTV1D = linkonce_odr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3
+@_ZTV1E = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.E*, i32)* @_ZN1E1mEi to i8*)] }, !type !4
+
+define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define internal i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define linkonce_odr i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define internal i32 @_ZN1E1mEi(%struct.E* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define i32 @test2(%struct.E* %obj, i32 %a) {
+entry:
+  %0 = bitcast %struct.E* %obj to i8***
+  %vtable2 = load i8**, i8*** %0
+  %1 = bitcast i8** %vtable2 to i8*
+  %p2 = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1E")
+  call void @llvm.assume(i1 %p2)
+
+  %2 = bitcast i8** %vtable2 to i32 (%struct.E*, i32)**
+  %fptr33 = load i32 (%struct.E*, i32)*, i32 (%struct.E*, i32)** %2, align 8
+
+  %call4 = tail call i32 %fptr33(%struct.E* nonnull %obj, i32 %a)
+  ret i32 %call4
+}
+
+attributes #0 = { noinline optnone }
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTS1B"}
+!2 = !{i64 16, !"_ZTS1C"}
+!3 = !{i64 16, !"_ZTS1D"}
+!4 = !{i64 16, !"_ZTS1E"}
Index: llvm/trunk/test/ThinLTO/X86/devirt.ll
===================================================================
--- llvm/trunk/test/ThinLTO/X86/devirt.ll
+++ llvm/trunk/test/ThinLTO/X86/devirt.ll
@@ -20,10 +20,10 @@
 ; and that we generate summary information needed for index-based WPD.
 ; RUN: llvm-dis -o - %t2.o | FileCheck %s --check-prefix=NOENABLESPLITFLAG
 ; NOENABLESPLITFLAG-DAG: !{i32 1, !"EnableSplitLTOUnit", i32 0}
-; NOENABLESPLITFLAG-DAG: [[An:\^[0-9]+]] = gv: (name: "_ZN1A1nEi")
-; NOENABLESPLITFLAG-DAG: [[Bf:\^[0-9]+]] = gv: (name: "_ZN1B1fEi")
-; NOENABLESPLITFLAG-DAG: [[Cf:\^[0-9]+]] = gv: (name: "_ZN1C1fEi")
-; NOENABLESPLITFLAG-DAG: [[Dm:\^[0-9]+]] = gv: (name: "_ZN1D1mEi")
+; NOENABLESPLITFLAG-DAG: [[An:\^[0-9]+]] = gv: (name: "_ZN1A1nEi"
+; NOENABLESPLITFLAG-DAG: [[Bf:\^[0-9]+]] = gv: (name: "_ZN1B1fEi"
+; NOENABLESPLITFLAG-DAG: [[Cf:\^[0-9]+]] = gv: (name: "_ZN1C1fEi"
+; NOENABLESPLITFLAG-DAG: [[Dm:\^[0-9]+]] = gv: (name: "_ZN1D1mEi"
 ; NOENABLESPLITFLAG-DAG: [[B:\^[0-9]+]] = gv: (name: "_ZTV1B", {{.*}} vTableFuncs: ((virtFunc: [[Bf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[Bf]], [[An]])
 ; NOENABLESPLITFLAG-DAG: [[C:\^[0-9]+]] = gv: (name: "_ZTV1C", {{.*}} vTableFuncs: ((virtFunc: [[Cf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[An]], [[Cf]])
 ; NOENABLESPLITFLAG-DAG: [[D:\^[0-9]+]] = gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc: [[Dm]], offset: 16)), refs: ([[Dm]])
@@ -33,7 +33,31 @@
 ; Type Id on _ZTV1D should have been promoted
 ; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "1${{.*}}", summary: ((offset: 16, [[D]])))
 
-; TODO: Test index-based WPD one %t2.o once implemented.
+; Legacy PM, Index based WPD
+; RUN: llvm-lto2 run %t2.o -save-temps -pass-remarks=. \
+; RUN:   -o %t3 \
+; RUN:   -r=%t2.o,test,px \
+; RUN:   -r=%t2.o,_ZN1A1nEi,p \
+; RUN:   -r=%t2.o,_ZN1B1fEi,p \
+; RUN:   -r=%t2.o,_ZN1C1fEi,p \
+; RUN:   -r=%t2.o,_ZN1D1mEi,p \
+; RUN:   -r=%t2.o,_ZTV1B,px \
+; RUN:   -r=%t2.o,_ZTV1C,px \
+; RUN:   -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
+
+; New PM, Index based WPD
+; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \
+; RUN:   -o %t3 \
+; RUN:   -r=%t2.o,test,px \
+; RUN:   -r=%t2.o,_ZN1A1nEi,p \
+; RUN:   -r=%t2.o,_ZN1B1fEi,p \
+; RUN:   -r=%t2.o,_ZN1C1fEi,p \
+; RUN:   -r=%t2.o,_ZN1D1mEi,p \
+; RUN:   -r=%t2.o,_ZTV1B,px \
+; RUN:   -r=%t2.o,_ZTV1C,px \
+; RUN:   -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
 
 ; Legacy PM
 ; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436.
@@ -138,10 +162,24 @@
 declare i1 @llvm.type.test(i8*, metadata)
 declare void @llvm.assume(i1)
 
-declare i32 @_ZN1B1fEi(%struct.B* %this, i32 %a)
-declare i32 @_ZN1A1nEi(%struct.A* %this, i32 %a)
-declare i32 @_ZN1C1fEi(%struct.C* %this, i32 %a)
-declare i32 @_ZN1D1mEi(%struct.D* %this, i32 %a)
+define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 {
+   ret i32 0;
+}
+
+; Make sure we don't inline or otherwise optimize out the direct calls.
+attributes #0 = { noinline optnone }
 
 !0 = !{i64 16, !"_ZTS1A"}
 !1 = !{i64 16, !"_ZTS1B"}
Index: llvm/trunk/test/ThinLTO/X86/devirt2.ll
===================================================================
--- llvm/trunk/test/ThinLTO/X86/devirt2.ll
+++ llvm/trunk/test/ThinLTO/X86/devirt2.ll
@@ -0,0 +1,278 @@
+; REQUIRES: x86-registered-target
+
+; Test devirtualization requiring promotion of local targets.
+
+; Generate split module with summary for hybrid Thin/Regular LTO WPD.
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1.o %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t2.o %p/Inputs/devirt3.ll
+
+; Check that we have module flag showing splitting enabled, and that we don't
+; generate summary information needed for index-based WPD.
+; RUN: llvm-modextract -b -n=0 %t2.o -o %t2.o.0
+; RUN: llvm-dis -o - %t2.o.0 | FileCheck %s --check-prefix=ENABLESPLITFLAG --implicit-check-not=vTableFuncs --implicit-check-not=typeidCompatibleVTable
+; RUN: llvm-modextract -b -n=1 %t2.o -o %t2.o.1
+; RUN: llvm-dis -o - %t2.o.1 | FileCheck %s --check-prefix=ENABLESPLITFLAG --implicit-check-not=vTableFuncs --implicit-check-not=typeidCompatibleVTable
+; ENABLESPLITFLAG: !{i32 1, !"EnableSplitLTOUnit", i32 1}
+
+; Generate unsplit module with summary for ThinLTO index-based WPD.
+; RUN: opt -thinlto-bc -o %t3.o %s
+; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt3.ll
+
+; Check that we don't have module flag when splitting not enabled for ThinLTO,
+; and that we generate summary information needed for index-based WPD.
+; RUN: llvm-dis -o - %t4.o | FileCheck %s --check-prefix=NOENABLESPLITFLAG
+; NOENABLESPLITFLAG-DAG: !{i32 1, !"EnableSplitLTOUnit", i32 0}
+; NOENABLESPLITFLAG-DAG: [[An:\^[0-9]+]] = gv: (name: "_ZN1A1nEi"
+; NOENABLESPLITFLAG-DAG: [[Bf:\^[0-9]+]] = gv: (name: "_ZN1B1fEi"
+; NOENABLESPLITFLAG-DAG: [[Cf:\^[0-9]+]] = gv: (name: "_ZN1C1fEi"
+; NOENABLESPLITFLAG-DAG: [[Dm:\^[0-9]+]] = gv: (name: "_ZN1D1mEi"
+; NOENABLESPLITFLAG-DAG: [[B:\^[0-9]+]] = gv: (name: "_ZTV1B", {{.*}} vTableFuncs: ((virtFunc: [[Bf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[Bf]], [[An]])
+; NOENABLESPLITFLAG-DAG: [[C:\^[0-9]+]] = gv: (name: "_ZTV1C", {{.*}} vTableFuncs: ((virtFunc: [[Cf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[An]], [[Cf]])
+; NOENABLESPLITFLAG-DAG: [[D:\^[0-9]+]] = gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc: [[Dm]], offset: 16)), refs: ([[Dm]])
+; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1A", summary: ((offset: 16, [[B]]), (offset: 16, [[C]])))
+; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1B", summary: ((offset: 16, [[B]])))
+; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1C", summary: ((offset: 16, [[C]])))
+; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1D", summary: ((offset: 16, [[D]])))
+
+; Legacy PM, Index based WPD
+; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \
+; RUN:   -wholeprogramdevirt-print-index-based \
+; RUN:   -o %t5 \
+; RUN:   -r=%t3.o,test,px \
+; RUN:   -r=%t3.o,_ZTV1B, \
+; RUN:   -r=%t3.o,_ZTV1C, \
+; RUN:   -r=%t3.o,_ZTV1D, \
+; RUN:   -r=%t3.o,_ZN1D1mEi, \
+; RUN:   -r=%t3.o,test2, \
+; RUN:   -r=%t4.o,_ZN1B1fEi,p \
+; RUN:   -r=%t4.o,_ZN1C1fEi,p \
+; RUN:   -r=%t4.o,_ZN1D1mEi,p \
+; RUN:   -r=%t4.o,test2,px \
+; RUN:   -r=%t4.o,_ZTV1B,px \
+; RUN:   -r=%t4.o,_ZTV1C,px \
+; RUN:   -r=%t4.o,_ZTV1D,px \
+; RUN:   -r=%t4.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK --check-prefix=PRINT
+; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
+; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2
+; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-INDEX1
+; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-INDEX2
+
+; New PM, Index based WPD
+; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \
+; RUN:   -wholeprogramdevirt-print-index-based \
+; RUN:   -o %t5 \
+; RUN:   -r=%t3.o,test,px \
+; RUN:   -r=%t3.o,_ZTV1B, \
+; RUN:   -r=%t3.o,_ZTV1C, \
+; RUN:   -r=%t3.o,_ZTV1D, \
+; RUN:   -r=%t3.o,_ZN1D1mEi, \
+; RUN:   -r=%t3.o,test2, \
+; RUN:   -r=%t4.o,_ZN1B1fEi,p \
+; RUN:   -r=%t4.o,_ZN1C1fEi,p \
+; RUN:   -r=%t4.o,_ZN1D1mEi,p \
+; RUN:   -r=%t4.o,test2,px \
+; RUN:   -r=%t4.o,_ZTV1B,px \
+; RUN:   -r=%t4.o,_ZTV1C,px \
+; RUN:   -r=%t4.o,_ZTV1D,px \
+; RUN:   -r=%t4.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK --check-prefix=PRINT
+; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
+; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2
+; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-INDEX1
+; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-INDEX2
+
+; NM-INDEX1-DAG: U _ZN1A1nEi.llvm.
+; NM-INDEX1-DAG: U _ZN1E1mEi.llvm.
+; NM-INDEX1-DAG: U _ZN1D1mEi
+
+; NM-INDEX2-DAG: T _ZN1A1nEi.llvm.
+; NM-INDEX2-DAG: T _ZN1E1mEi.llvm.
+; NM-INDEX2-DAG: W _ZN1D1mEi
+; NM-INDEX2-DAG: t _ZN1B1fEi
+; NM-INDEX2-DAG: t _ZN1C1fEi
+
+; Index based WPD, distributed backends
+; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm \
+; RUN:   -thinlto-distributed-indexes -wholeprogramdevirt-print-index-based \
+; RUN:   -o %t5 \
+; RUN:   -r=%t3.o,test,px \
+; RUN:   -r=%t3.o,_ZTV1B, \
+; RUN:   -r=%t3.o,_ZTV1C, \
+; RUN:   -r=%t3.o,_ZTV1D, \
+; RUN:   -r=%t3.o,_ZN1D1mEi, \
+; RUN:   -r=%t3.o,test2, \
+; RUN:   -r=%t4.o,_ZN1B1fEi,p \
+; RUN:   -r=%t4.o,_ZN1C1fEi,p \
+; RUN:   -r=%t4.o,_ZN1D1mEi,p \
+; RUN:   -r=%t4.o,test2,px \
+; RUN:   -r=%t4.o,_ZTV1B,px \
+; RUN:   -r=%t4.o,_ZTV1C,px \
+; RUN:   -r=%t4.o,_ZTV1D,px \
+; RUN:   -r=%t4.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=PRINT
+
+; PRINT-DAG: Devirtualized call to {{.*}} (_ZN1A1nEi)
+; PRINT-DAG: Devirtualized call to {{.*}} (_ZN1E1mEi)
+; PRINT-DAG: Devirtualized call to {{.*}} (_ZN1D1mEi)
+
+; Legacy PM
+; RUN: llvm-lto2 run %t1.o %t2.o -save-temps -pass-remarks=. \
+; RUN:   -o %t5 \
+; RUN:   -r=%t1.o,test,px \
+; RUN:   -r=%t1.o,_ZTV1B, \
+; RUN:   -r=%t1.o,_ZTV1C, \
+; RUN:   -r=%t1.o,_ZTV1D, \
+; RUN:   -r=%t1.o,_ZTV1D, \
+; RUN:   -r=%t1.o,_ZN1D1mEi, \
+; RUN:   -r=%t1.o,_ZN1D1mEi, \
+; RUN:   -r=%t1.o,test2, \
+; RUN:   -r=%t2.o,_ZN1A1nEi,p \
+; RUN:   -r=%t2.o,_ZN1B1fEi,p \
+; RUN:   -r=%t2.o,_ZN1C1fEi,p \
+; RUN:   -r=%t2.o,_ZN1D1mEi,p \
+; RUN:   -r=%t2.o,_ZN1E1mEi,p \
+; RUN:   -r=%t2.o,_ZTV1B, \
+; RUN:   -r=%t2.o,_ZTV1C, \
+; RUN:   -r=%t2.o,_ZTV1D, \
+; RUN:   -r=%t2.o,_ZTV1E, \
+; RUN:   -r=%t2.o,test2,px \
+; RUN:   -r=%t2.o,_ZN1A1nEi, \
+; RUN:   -r=%t2.o,_ZN1B1fEi, \
+; RUN:   -r=%t2.o,_ZN1C1fEi, \
+; RUN:   -r=%t2.o,_ZN1D1mEi, \
+; RUN:   -r=%t2.o,_ZN1E1mEi, \
+; RUN:   -r=%t2.o,_ZTV1B,px \
+; RUN:   -r=%t2.o,_ZTV1C,px \
+; RUN:   -r=%t2.o,_ZTV1D,px \
+; RUN:   -r=%t2.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK
+; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
+; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2
+; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-HYBRID1
+; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-HYBRID2
+
+; New PM
+; RUN: llvm-lto2 run %t1.o %t2.o -save-temps -use-new-pm -pass-remarks=. \
+; RUN:   -o %t5 \
+; RUN:   -r=%t1.o,test,px \
+; RUN:   -r=%t1.o,_ZTV1B, \
+; RUN:   -r=%t1.o,_ZTV1C, \
+; RUN:   -r=%t1.o,_ZTV1D, \
+; RUN:   -r=%t1.o,_ZTV1D, \
+; RUN:   -r=%t1.o,_ZN1D1mEi, \
+; RUN:   -r=%t1.o,_ZN1D1mEi, \
+; RUN:   -r=%t1.o,test2, \
+; RUN:   -r=%t2.o,_ZN1A1nEi,p \
+; RUN:   -r=%t2.o,_ZN1B1fEi,p \
+; RUN:   -r=%t2.o,_ZN1C1fEi,p \
+; RUN:   -r=%t2.o,_ZN1D1mEi,p \
+; RUN:   -r=%t2.o,_ZN1E1mEi,p \
+; RUN:   -r=%t2.o,_ZTV1B, \
+; RUN:   -r=%t2.o,_ZTV1C, \
+; RUN:   -r=%t2.o,_ZTV1D, \
+; RUN:   -r=%t2.o,_ZTV1E, \
+; RUN:   -r=%t2.o,test2,px \
+; RUN:   -r=%t2.o,_ZN1A1nEi, \
+; RUN:   -r=%t2.o,_ZN1B1fEi, \
+; RUN:   -r=%t2.o,_ZN1C1fEi, \
+; RUN:   -r=%t2.o,_ZN1D1mEi, \
+; RUN:   -r=%t2.o,_ZN1E1mEi, \
+; RUN:   -r=%t2.o,_ZTV1B,px \
+; RUN:   -r=%t2.o,_ZTV1C,px \
+; RUN:   -r=%t2.o,_ZTV1D,px \
+; RUN:   -r=%t2.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK
+; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
+; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2
+; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-HYBRID1
+; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-HYBRID2
+
+; NM-HYBRID1-DAG: U _ZN1A1nEi$
+; NM-HYBRID1-DAG: U _ZN1E1mEi$
+; NM-HYBRID1-DAG: U _ZN1D1mEi
+
+; NM-HYBRID2-DAG: T _ZN1A1nEi$
+; NM-HYBRID2-DAG: T _ZN1E1mEi$
+; NM-HYBRID2-DAG: W _ZN1D1mEi
+; NM-HYBRID2-DAG: T _ZN1B1fEi
+; NM-HYBRID2-DAG: T _ZN1C1fEi
+
+; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi
+; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi
+; We should devirt call to _ZN1E1mEi once in importing module and once
+; in original (exporting) module.
+; REMARK-DAG: single-impl: devirtualized a call to _ZN1E1mEi
+; REMARK-DAG: single-impl: devirtualized a call to _ZN1E1mEi
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.A = type { i32 (...)** }
+%struct.B = type { %struct.A }
+%struct.C = type { %struct.A }
+%struct.D = type { i32 (...)** }
+%struct.E = type { i32 (...)** }
+
+@_ZTV1B = external constant [4 x i8*]
+@_ZTV1C = external constant [4 x i8*]
+;@_ZTV1D = external constant [3 x i8*]
+@_ZTV1D = linkonce_odr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3
+
+define linkonce_odr i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 {
+   ret i32 0
+}
+
+; CHECK-IR1-LABEL: define i32 @test
+define i32 @test(%struct.A* %obj, %struct.D* %obj2, %struct.E* %obj3, i32 %a) {
+entry:
+  %0 = bitcast %struct.A* %obj to i8***
+  %vtable = load i8**, i8*** %0
+  %1 = bitcast i8** %vtable to i8*
+  %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A")
+  call void @llvm.assume(i1 %p)
+  %fptrptr = getelementptr i8*, i8** %vtable, i32 1
+  %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)**
+  %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8
+
+  ; Check that the call was devirtualized. Ignore extra character before
+  ; symbol name which would happen if it was promoted during module
+  ; splitting for hybrid WPD.
+  ; CHECK-IR1: %call = tail call i32 bitcast (void ()* @{{.*}}_ZN1A1nEi
+  %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a)
+
+  %3 = bitcast i8** %vtable to i32 (%struct.A*, i32)**
+  %fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8
+
+  ; We still have to call it as virtual.
+  ; CHECK-IR1: %call3 = tail call i32 %fptr22
+  %call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call)
+
+  %4 = bitcast %struct.D* %obj2 to i8***
+  %vtable2 = load i8**, i8*** %4
+  %5 = bitcast i8** %vtable2 to i8*
+  %p2 = call i1 @llvm.type.test(i8* %5, metadata !"_ZTS1D")
+  call void @llvm.assume(i1 %p2)
+
+  %6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)**
+  %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8
+
+  ; Check that the call was devirtualized.
+  ; CHECK-IR1: %call4 = tail call i32 @_ZN1D1mEi
+  %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3)
+
+  %call5 = tail call i32 @test2(%struct.E* nonnull %obj3, i32 %call4)
+  ret i32 %call5
+}
+; CHECK-IR1-LABEL: ret i32
+; CHECK-IR1-LABEL: }
+
+; CHECK-IR2: define i32 @test2
+; CHECK-IR2-NEXT: entry:
+; Check that the call was devirtualized. Ignore extra character before
+; symbol name which would happen if it was promoted during module
+; splitting for hybrid WPD.
+; CHECK-IR2-NEXT:   %call4 = tail call i32 @{{.*}}_ZN1E1mEi
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+declare i32 @test2(%struct.E* %obj, i32 %a)
+
+attributes #0 = { noinline optnone }
+
+!3 = !{i64 16, !"_ZTS1D"}
Index: llvm/trunk/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll
===================================================================
--- llvm/trunk/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll
+++ llvm/trunk/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll
@@ -0,0 +1,66 @@
+; REQUIRES: x86-registered-target
+
+; Test that index-only devirtualization handles and ignores any
+; type metadata that could not be summarized (because it was internal
+; and could not be promoted due to the fact that the module has
+; no external symbols and therefore could not be assigned a unique
+; identifier). In this case we should simply not get the type
+; metadata summary entries, and no promotion will occur.
+
+; Generate unsplit module with summary for ThinLTO index-based WPD.
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit=false -o %t2.o %s
+
+; Check that we don't have module flag when splitting not enabled for ThinLTO,
+; and that we generate summary information needed for index-based WPD.
+; RUN: llvm-dis -o - %t2.o | FileCheck %s --check-prefix=DIS
+; DIS-NOT: typeIdInfo
+; DIS-NOT: typeidMetadata
+
+; Legacy PM, Index based WPD
+; RUN: llvm-lto2 run %t2.o -save-temps -pass-remarks=. \
+; RUN:   -o %t3 \
+; RUN:   -r=%t2.o,test,plx \
+; RUN:   -r=%t2.o,_ZN1D1mEi,
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
+
+; New PM, Index based WPD
+; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \
+; RUN:   -o %t3 \
+; RUN:   -r=%t2.o,test,plx \
+; RUN:   -r=%t2.o,_ZN1D1mEi,
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.D = type { i32 (...)** }
+
+@_ZTV1D = internal constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3
+
+; CHECK-IR-LABEL: define weak_odr dso_local i32 @test
+define weak_odr i32 @test(%struct.D* %obj2, i32 %a) {
+entry:
+  %0 = bitcast %struct.D* %obj2 to i8***
+  %vtable2 = load i8**, i8*** %0
+  %1 = bitcast i8** %vtable2 to i8*
+  %p2 = call i1 @llvm.type.test(i8* %1, metadata !4)
+  call void @llvm.assume(i1 %p2)
+
+  %2 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)**
+  %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %2, align 8
+
+  ; Check that the call was not devirtualized.
+  ; CHECK-IR: %call4 = tail call i32 %fptr33
+  %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 0)
+  ret i32 %call4
+}
+; CHECK-IR-LABEL: ret i32
+; CHECK-IR-LABEL: }
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+declare i32 @_ZN1D1mEi(%struct.D* %this, i32 %a)
+
+!3 = !{i64 16, !4}
+!4 = distinct !{}
Index: llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp
===================================================================
--- llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp
+++ llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp
@@ -291,6 +291,14 @@
     std::vector<SymbolResolution> Res;
     for (const InputFile::Symbol &Sym : Input->symbols()) {
       auto I = CommandLineResolutions.find({F, Sym.getName()});
+      // If it isn't found, look for "$", which would have been added
+      // (followed by a hash) when the symbol was promoted during module
+      // splitting if it was defined in one part and used in the other.
+      // Try looking up the symbol name before the "$".
+      if (I == CommandLineResolutions.end()) {
+        auto SplitName = Sym.getName().rsplit("$");
+        I = CommandLineResolutions.find({F, SplitName.first});
+      }
       if (I == CommandLineResolutions.end()) {
         llvm::errs() << argv[0] << ": missing symbol resolution for " << F
                      << ',' << Sym.getName() << '\n';