diff --git a/llvm/include/llvm/Transforms/IPO/Internalize.h b/llvm/include/llvm/Transforms/IPO/Internalize.h --- a/llvm/include/llvm/Transforms/IPO/Internalize.h +++ b/llvm/include/llvm/Transforms/IPO/Internalize.h @@ -21,7 +21,7 @@ #ifndef LLVM_TRANSFORMS_IPO_INTERNALIZE_H #define LLVM_TRANSFORMS_IPO_INTERNALIZE_H -#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringSet.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/PassManager.h" @@ -34,6 +34,21 @@ /// A pass that internalizes all functions and variables other than those that /// must be preserved according to \c MustPreserveGV. class InternalizePass : public PassInfoMixin { + struct ComdatInfo { + // The number of members. A comdat with one member which is not externally + // visible can be freely dropped. + size_t Size = 0; + // Whether the comdat has an externally visible member. + bool External = false; + // ELF only. Used to rename the comdat. + Comdat *Dest = nullptr; + }; + + // For ELF, we need to rename comdat to a uniquified name. We derive the new + // comdat name from ModuleId. + std::string ModuleId; + bool IsELF = false; + /// Client supplied callback to control wheter a symbol must be preserved. const std::function MustPreserveGV; /// Set of symbols private to the compiler that this pass should not touch. @@ -44,11 +59,11 @@ /// Internalize GV if it is possible to do so, i.e. it is not externally /// visible and is not a member of an externally visible comdat. bool maybeInternalize(GlobalValue &GV, - const DenseSet &ExternalComdats); + DenseMap &ComdatMap); /// If GV is part of a comdat and is externally visible, keep track of its /// comdat so that we don't internalize any of its members. - void checkComdatVisibility(GlobalValue &GV, - DenseSet &ExternalComdats); + void checkComdat(GlobalValue &GV, + DenseMap &ComdatMap); public: InternalizePass(); diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp --- a/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/llvm/lib/Transforms/IPO/Internalize.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" @@ -33,6 +34,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/GlobalStatus.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" using namespace llvm; #define DEBUG_TYPE "internalize" @@ -111,14 +113,33 @@ } bool InternalizePass::maybeInternalize( - GlobalValue &GV, const DenseSet &ExternalComdats) { + GlobalValue &GV, DenseMap &ComdatMap) { + SmallString<0> ComdatName; if (Comdat *C = GV.getComdat()) { - if (ExternalComdats.count(C)) + // For GlobalAlias, C is the aliasee object's comdat which may have been + // redirected. So ComdatMap may not contain C. + if (ComdatMap.lookup(C).External) return false; - // If a comdat is not externally visible we can drop it. - if (auto GO = dyn_cast(&GV)) - GO->setComdat(nullptr); + if (auto *GO = dyn_cast(&GV)) { + // If a comdat with one member is not externally visible, we can drop it. + // Otherwise, the comdat can be used to establish dependencies among the + // group of sections. Thus we have to keep the comdat. + // + // On ELF, GNU ld and gold use the signature name as the comdat + // deduplication key. Rename the comdat to suppress deduplication with + // other object files. On COFF, non-external selection symbol suppresses + // deduplication and thus does not need renaming. + ComdatInfo &Info = ComdatMap.find(C)->second; + if (Info.Size == 1) { + GO->setComdat(nullptr); + } else if (IsELF) { + if (Info.Dest == nullptr) + Info.Dest = GV.getParent()->getOrInsertComdat( + (C->getName() + ModuleId).toStringRef(ComdatName)); + GO->setComdat(Info.Dest); + } + } if (GV.hasLocalLinkage()) return false; @@ -135,16 +156,18 @@ return true; } -// If GV is part of a comdat and is externally visible, keep track of its -// comdat so that we don't internalize any of its members. -void InternalizePass::checkComdatVisibility( - GlobalValue &GV, DenseSet &ExternalComdats) { +// If GV is part of a comdat and is externally visible, update the comdat size +// and keep track of its comdat so that we don't internalize any of its members. +void InternalizePass::checkComdat( + GlobalValue &GV, DenseMap &ComdatMap) { Comdat *C = GV.getComdat(); if (!C) return; + ComdatInfo &Info = ComdatMap.try_emplace(C).first->second; + ++Info.Size; if (shouldPreserveGV(GV)) - ExternalComdats.insert(C); + Info.External = true; } bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) { @@ -154,15 +177,15 @@ SmallVector Used; collectUsedGlobalVariables(M, Used, false); - // Collect comdat visiblity information for the module. - DenseSet ExternalComdats; + // Collect comdat size and visiblity information for the module. + DenseMap ComdatMap; if (!M.getComdatSymbolTable().empty()) { for (Function &F : M) - checkComdatVisibility(F, ExternalComdats); + checkComdat(F, ComdatMap); for (GlobalVariable &GV : M.globals()) - checkComdatVisibility(GV, ExternalComdats); + checkComdat(GV, ComdatMap); for (GlobalAlias &GA : M.aliases()) - checkComdatVisibility(GA, ExternalComdats); + checkComdat(GA, ComdatMap); } // We must assume that globals in llvm.used have a reference that not even @@ -179,8 +202,10 @@ } // Mark all functions not in the api as internal. + ModuleId = getUniqueModuleId(&M); + IsELF = Triple(M.getTargetTriple()).isOSBinFormatELF(); for (Function &I : M) { - if (!maybeInternalize(I, ExternalComdats)) + if (!maybeInternalize(I, ComdatMap)) continue; Changed = true; @@ -213,7 +238,7 @@ // Mark all global variables with initializers that are not in the api as // internal as well. for (auto &GV : M.globals()) { - if (!maybeInternalize(GV, ExternalComdats)) + if (!maybeInternalize(GV, ComdatMap)) continue; Changed = true; @@ -223,7 +248,7 @@ // Mark all aliases that are not in the api as internal as well. for (auto &GA : M.aliases()) { - if (!maybeInternalize(GA, ExternalComdats)) + if (!maybeInternalize(GA, ComdatMap)) continue; Changed = true; diff --git a/llvm/test/Transforms/Internalize/comdat-empty-moduleid.ll b/llvm/test/Transforms/Internalize/comdat-empty-moduleid.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Internalize/comdat-empty-moduleid.ll @@ -0,0 +1,17 @@ +; RUN: opt < %s -mtriple=x86_64 -internalize -S | FileCheck %s + +$c2 = comdat any + +;; Without an exported symbol, the module ID is empty, and we don't rename +;; comdat for ELF. This does not matter in practice because all the symbols +;; will be optimized out. + +; CHECK: define internal void @c2_a() comdat($c2) { +define void @c2_a() comdat($c2) { + ret void +} + +; CHECK: define internal void @c2_b() comdat($c2) { +define void @c2_b() comdat($c2) { + ret void +} diff --git a/llvm/test/Transforms/Internalize/comdat.ll b/llvm/test/Transforms/Internalize/comdat.ll --- a/llvm/test/Transforms/Internalize/comdat.ll +++ b/llvm/test/Transforms/Internalize/comdat.ll @@ -1,14 +1,23 @@ -; RUN: opt < %s -internalize -internalize-public-api-list c1 -internalize-public-api-list c2 -internalize-public-api-list c3 -internalize-public-api-list c4 -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64 -internalize -internalize-public-api-list main -internalize-public-api-list c1 -internalize-public-api-list c2 \ +; RUN: -internalize-public-api-list c3 -internalize-public-api-list c4 -S | FileCheck %s --check-prefixes=CHECK,ELF +; RUN: opt < %s -mtriple=x86_64-windows -internalize -internalize-public-api-list main -internalize-public-api-list c1 -internalize-public-api-list c2 \ +; RUN: -internalize-public-api-list c3 -internalize-public-api-list c4 -S | FileCheck %s --check-prefixes=CHECK,COFF $c1 = comdat any $c2 = comdat any $c3 = comdat any $c4 = comdat any +$c5 = comdat any + +; ELF: $c2.[[MODULEID:[0-9a-f]+]] = comdat any +; COFF: $c2 = comdat any ; CHECK: @c1_c = global i32 0, comdat($c1) @c1_c = global i32 0, comdat($c1) -; CHECK: @c2_b = internal global i32 0{{$}} +;; $c2 has more than one member. Keep the comdat. +; ELF: @c2_b = internal global i32 0, comdat($c2.[[MODULEID]]) +; COFF: @c2_b = internal global i32 0, comdat($c2) @c2_b = global i32 0, comdat($c2) ; CHECK: @c3 = global i32 0, comdat{{$}} @@ -36,12 +45,14 @@ ret void } -; CHECK: define internal void @c2() { +; ELF: define internal void @c2() comdat($c2.[[MODULEID]]) { +; COFF: define internal void @c2() comdat { define internal void @c2() comdat { ret void } -; CHECK: define internal void @c2_a() { +; ELF: define internal void @c2_a() comdat($c2.[[MODULEID]]) { +; COFF: define internal void @c2_a() comdat($c2) { define void @c2_a() comdat($c2) { ret void } @@ -50,3 +61,15 @@ define void @c3_a() comdat($c3) { ret void } + +;; There is only one member in the comdat. Delete the comdat as a size optimization. +; CHECK: define internal void @c5() { +define void @c5() comdat($c5) { + ret void +} + +;; Add a non-comdat symbol to ensure the module ID is not empty so that we can +;; create unique comdat names. +define void @main() { + ret void +}