Index: lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- lib/LTO/ThinLTOCodeGenerator.cpp +++ lib/LTO/ThinLTOCodeGenerator.cpp @@ -53,6 +53,7 @@ #include "llvm/Transforms/IPO/FunctionImport.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" @@ -225,7 +226,8 @@ } static void optimizeModule(Module &TheModule, TargetMachine &TM, - unsigned OptLevel, bool Freestanding) { + unsigned OptLevel, bool Freestanding, + ModuleSummaryIndex *Index) { // Populate the PassManager PassManagerBuilder PMB; PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple()); @@ -239,6 +241,7 @@ // Already did this in verifyLoadedModule(). PMB.VerifyInput = false; PMB.VerifyOutput = false; + PMB.ImportSummary = Index; legacy::PassManager PM; @@ -433,7 +436,7 @@ saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc"); } - optimizeModule(TheModule, TM, OptLevel, Freestanding); + optimizeModule(TheModule, TM, OptLevel, Freestanding, &Index); saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc"); @@ -577,29 +580,36 @@ return CombinedIndex; } -static void internalizeAndPromoteInIndex( - const StringMap &ExportLists, - const DenseSet &GUIDPreservedSymbols, - const DenseMap - &PrevailingCopy, - ModuleSummaryIndex &Index) { - auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { +struct IsExported { + const StringMap &ExportLists; + const DenseSet &GUIDPreservedSymbols; + + IsExported(const StringMap &ExportLists, + const DenseSet &GUIDPreservedSymbols) + : ExportLists(ExportLists), GUIDPreservedSymbols(GUIDPreservedSymbols) {} + + bool operator()(StringRef ModuleIdentifier, GlobalValue::GUID GUID) const { const auto &ExportList = ExportLists.find(ModuleIdentifier); return (ExportList != ExportLists.end() && ExportList->second.count(GUID)) || GUIDPreservedSymbols.count(GUID); - }; + } +}; - auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { +struct IsPrevailing { + const DenseMap &PrevailingCopy; + IsPrevailing(const DenseMap + &PrevailingCopy) + : PrevailingCopy(PrevailingCopy) {} + + bool operator()(GlobalValue::GUID GUID, const GlobalValueSummary *S) const { const auto &Prevailing = PrevailingCopy.find(GUID); // Not in map means that there was only one copy, which must be prevailing. if (Prevailing == PrevailingCopy.end()) return true; return Prevailing->second == S; }; - - thinLTOInternalizeAndPromoteInIndex(Index, isExported, isPrevailing); -} +}; static void computeDeadSymbolsInIndex( ModuleSummaryIndex &Index, @@ -656,8 +666,9 @@ // Promote the exported values in the index, so that they are promoted // in the module. - internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, - PrevailingCopy, Index); + thinLTOInternalizeAndPromoteInIndex( + Index, IsExported(ExportLists, GUIDPreservedSymbols), + IsPrevailing(PrevailingCopy)); promoteModule(TheModule, Index); } @@ -814,8 +825,9 @@ // Promote the exported values in the index, so that they are promoted // in the module. - internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, - PrevailingCopy, Index); + thinLTOInternalizeAndPromoteInIndex( + Index, IsExported(ExportLists, GUIDPreservedSymbols), + IsPrevailing(PrevailingCopy)); promoteModule(TheModule, Index); @@ -834,7 +846,8 @@ initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple())); // Optimize now - optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding); + optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding, + nullptr); } /// Write out the generated object file, either from CacheEntryPath or from @@ -955,6 +968,15 @@ // Synthesize entry counts for functions in the combined index. computeSyntheticCounts(*Index); + // Perform index-based WPD. This will return immediately if there are + // no index entries in the typeIdMetadata map (e.g. if we are instead + // performing IR-based WPD in hybrid regular/thin LTO mode). + std::map> LocalWPDTargetsMap; + std::set ExportedGUIDs; + runWholeProgramDevirtOnIndex(*Index, ExportedGUIDs, LocalWPDTargetsMap); + for (auto GUID : ExportedGUIDs) + GUIDPreservedSymbols.insert(GUID); + // Collect the import/export lists for all modules from the call-graph in the // combined index. StringMap ImportLists(ModuleCount); @@ -979,8 +1001,12 @@ // Use global summary-based analysis to identify symbols that can be // internalized (because they aren't exported or preserved as per callback). // Changes are made in the index, consumed in the ThinLTO backends. - internalizeAndPromoteInIndex(ExportLists, GUIDPreservedSymbols, - PrevailingCopy, *Index); + updateIndexWPDForExports(*Index, + IsExported(ExportLists, GUIDPreservedSymbols), + LocalWPDTargetsMap); + thinLTOInternalizeAndPromoteInIndex( + *Index, IsExported(ExportLists, GUIDPreservedSymbols), + IsPrevailing(PrevailingCopy)); // Make sure that every module has an entry in the ExportLists, ImportList, // GVSummary and ResolvedODR maps to enable threaded access to these maps Index: test/ThinLTO/X86/devirt_promote_legacy.ll =================================================================== --- test/ThinLTO/X86/devirt_promote_legacy.ll +++ test/ThinLTO/X86/devirt_promote_legacy.ll @@ -0,0 +1,57 @@ +; REQUIRES: x86-registered-target + +; Test devirtualization requiring promotion of local targets, where the +; promotion is required by one devirtualization and needs to be updated +; for a second devirtualization in the defining module as a post-pass +; update. + +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -o %t3.o %s +; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt_promote.ll + +; RUN: llvm-lto -thinlto-action=run %t3.o %t4.o --thinlto-save-temps=%t5. \ +; RUN: --pass-remarks=. \ +; RUN: --exported-symbol=test \ +; RUN: --exported-symbol=test2 \ +; RUN: --exported-symbol=_ZTV1B 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t5.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1 +; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2 + +; We should devirt call to _ZN1A1nEi once in importing module and once +; in original (exporting) module. +; REMARK-COUNT-2: single-impl: devirtualized a call to _ZN1A1nEi.llvm. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } + +; CHECK-IR1-LABEL: define i32 @test +define i32 @test(%struct.A* %obj, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8*** + %vtable = load i8**, i8*** %0 + %1 = bitcast i8** %vtable to i8* + %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr i8*, i8** %vtable, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)** + %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR1: %call = tail call i32 bitcast (void ()* @_ZN1A1nEi + %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a) + + ret i32 %call +} +; CHECK-IR1-LABEL: ret i32 +; CHECK-IR1-LABEL: } + +; CHECK-IR2: define i32 @test2 +; Check that the call was devirtualized. +; CHECK-IR2: %call4 = tail call i32 @_ZN1A1nEi + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +attributes #0 = { noinline optnone } Index: tools/clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- tools/clang/lib/Driver/ToolChains/Clang.cpp +++ tools/clang/lib/Driver/ToolChains/Clang.cpp @@ -3738,13 +3738,7 @@ if (D.isUsingLTO() && !isDeviceOffloadAction) { Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ); - - // The Darwin and PS4 linkers currently use the legacy LTO API, which - // does not support LTO unit features (CFI, whole program vtable opt) - // under ThinLTO. - if (!(RawTriple.isOSDarwin() || RawTriple.isPS4()) || - D.getLTOMode() == LTOK_Full) - CmdArgs.push_back("-flto-unit"); + CmdArgs.push_back("-flto-unit"); } } Index: tools/clang/test/Driver/lto-unit.c =================================================================== --- tools/clang/test/Driver/lto-unit.c +++ tools/clang/test/Driver/lto-unit.c @@ -1,9 +1,8 @@ // RUN: %clang -target x86_64-unknown-linux -### %s -flto=full 2>&1 | FileCheck --check-prefix=UNIT %s // RUN: %clang -target x86_64-unknown-linux -### %s -flto=thin 2>&1 | FileCheck --check-prefix=UNIT %s // RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=full 2>&1 | FileCheck --check-prefix=UNIT %s -// RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=thin 2>&1 | FileCheck --check-prefix=NOUNIT %s +// RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=thin 2>&1 | FileCheck --check-prefix=UNIT %s // RUN: %clang -target x86_64-scei-ps4 -### %s -flto=full 2>&1 | FileCheck --check-prefix=UNIT %s -// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=thin 2>&1 | FileCheck --check-prefix=NOUNIT %s +// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=thin 2>&1 | FileCheck --check-prefix=UNIT %s // UNIT: "-flto-unit" -// NOUNIT-NOT: "-flto-unit"