Index: include/llvm/Linker/Linker.h =================================================================== --- include/llvm/Linker/Linker.h +++ include/llvm/Linker/Linker.h @@ -11,6 +11,8 @@ #define LLVM_LINKER_LINKER_H #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/Value.h" +#include #include namespace llvm { @@ -41,13 +43,15 @@ /// If \p ErrorMsg is not null, information about any error is written /// to it. /// Returns true on error. - bool linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg); + bool linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg, + std::map *RefMap = nullptr); bool linkInModule(Module *Src, std::string *ErrorMsg) { return linkInModule(Src, Linker::DestroySource, ErrorMsg); } static bool LinkModules(Module *Dest, Module *Src, unsigned Mode, - std::string *ErrorMsg); + std::string *ErrorMsg, + std::map *RefMap = nullptr); private: Module *Composite; Index: include/llvm/Linker/ResolveLinker.h =================================================================== --- include/llvm/Linker/ResolveLinker.h +++ include/llvm/Linker/ResolveLinker.h @@ -0,0 +1,32 @@ +//===- ResolveLinker.h - bitcode module linker ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This files implements the bitcode module linking by resolving symbol +// referected through libraries +// +//===----------------------------------------------------------------------===// + +#ifndef _RESOLVE_LINKER_HPP_ +#define _RESOLVE_LINKER_HPP_ +#include + +namespace llvm { + class Module; + + /// resolveLink - This function links an input module with the libraries + /// by resolving undefined symbols from these libraries. + /// Input module is modified to contain the linking result. If an error + /// occurs, true is returned and ErrorMsg (if not null) is set to indicate + /// the problem. Upon failure, the destination module could be in a modified + /// state, and shouldn't be relied on to be consistent. + /// Unresolved symbols may remain upon successful link. + bool resolveLink(llvm::Module* input, std::vector &libs, + std::string *ErrorMsg); +}; // namespace llvm +#endif // _RESOLVE_LINKER_HPP_ Index: lib/Linker/CMakeLists.txt =================================================================== --- lib/Linker/CMakeLists.txt +++ lib/Linker/CMakeLists.txt @@ -1,3 +1,4 @@ add_llvm_library(LLVMLinker LinkModules.cpp + ResolveLinker.cpp ) Index: lib/Linker/LinkModules.cpp =================================================================== --- lib/Linker/LinkModules.cpp +++ lib/Linker/LinkModules.cpp @@ -401,7 +401,10 @@ // Set of items not to link in from source. SmallPtrSet DoNotLinkFromSource; - + + // If symbol reference map is used only symbols from it will be linked in. + std::map *pReferenceMap; + // Vector of functions to lazily link in. std::vector LazilyLinkFunctions; @@ -411,10 +414,12 @@ std::string ErrorMsg; ModuleLinker(Module *dstM, TypeSet &Set, Module *srcM, unsigned mode, - bool SuppressWarnings=false) + bool SuppressWarnings=false, + std::map *pRefMap = nullptr) : DstM(dstM), SrcM(srcM), TypeMap(Set), ValMaterializer(TypeMap, DstM, LazilyLinkFunctions), Mode(mode), - SuppressWarnings(SuppressWarnings) {} + SuppressWarnings(SuppressWarnings), + pReferenceMap(pRefMap) {} bool run(); @@ -1012,6 +1017,8 @@ void ModuleLinker::linkAliasBodies() { for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end(); I != E; ++I) { + if (pReferenceMap && !(*pReferenceMap)[I]) + continue; if (DoNotLinkFromSource.count(I)) continue; if (Constant *Aliasee = I->getAliasee()) { @@ -1252,15 +1259,21 @@ // function... We do this so that when we begin processing function bodies, // all of the global values that may be referenced are available in our // ValueMap. - for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I) + for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I) { + if (pReferenceMap && !(*pReferenceMap)[I]) + continue; if (linkFunctionProto(I)) return true; + } // If there were any aliases, link them now. for (Module::alias_iterator I = SrcM->alias_begin(), - E = SrcM->alias_end(); I != E; ++I) + E = SrcM->alias_end(); I != E; ++I) { + if (pReferenceMap && !(*pReferenceMap)[I]) + continue; if (linkAliasProto(I)) return true; + } for (unsigned i = 0, e = AppendingVars.size(); i != e; ++i) linkAppendingVarInit(AppendingVars[i]); @@ -1268,6 +1281,8 @@ // Link in the function bodies that are defined in the source module into // DstM. for (Module::iterator SF = SrcM->begin(), E = SrcM->end(); SF != E; ++SF) { + if (pReferenceMap && !(*pReferenceMap)[SF]) continue; + // Skip if not linking from source. if (DoNotLinkFromSource.count(SF)) continue; @@ -1372,9 +1387,10 @@ Composite = nullptr; } -bool Linker::linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg) { +bool Linker::linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg, + std::map *ReferenceMap) { ModuleLinker TheLinker(Composite, IdentifiedStructTypes, Src, Mode, - SuppressWarnings); + SuppressWarnings, ReferenceMap); if (TheLinker.run()) { if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg; @@ -1393,9 +1409,10 @@ /// the problem. Upon failure, the Dest module could be in a modified state, /// and shouldn't be relied on to be consistent. bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode, - std::string *ErrorMsg) { + std::string *ErrorMsg, + std::map *ReferenceMap) { Linker L(Dest); - return L.linkInModule(Src, Mode, ErrorMsg); + return L.linkInModule(Src, Mode, ErrorMsg, ReferenceMap); } //===----------------------------------------------------------------------===// Index: lib/Linker/ResolveLinker.cpp =================================================================== --- lib/Linker/ResolveLinker.cpp +++ lib/Linker/ResolveLinker.cpp @@ -0,0 +1,355 @@ +//===- ResolveLinker.cpp - bitcode module linker -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------------===// +// +// This files implements the bitcode module linking by resolving symbol +// referected through libraries +// +//===--------------------------------------------------------------------===// + +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Linker/Linker.h" +#include "llvm/Linker/ResolveLinker.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include + +using namespace llvm; + +// Linking all library modules with the application module uses "selective +// extraction", in which only functions that are referenced from the +// application get extracted into the final module. +// +// The selective extraction is achieved by Reference Map (ModuleRefMap). +// A Reference Map for the application module keeps track of all functions +// that are referenced from the application but not defined in the application +// module. +// +// Reference Map is implemented by the following three functions: +// InitReferenceMap(), AddReferences(), and AddFuncReferences(). It hanles both +// extern and local functions. + +// Get a callee function or an alias to a function. +static Value* GetCalledFunction(CallInst* CI) { + Value* Callee = CI->getCalledValue(); + if (Function *F = dyn_cast(Callee)) { + return F; + } + if (GlobalAlias *GAV = dyn_cast(Callee)) { + return GAV; + } + if (ConstantExpr *CE = dyn_cast(Callee)) { + if (CE->getOpcode() == Instruction::BitCast) { + if (Function *F = dyn_cast(CE->getOperand(0))) { + return F; + } + if (GlobalAlias *GAV = dyn_cast(CE->getOperand(0))) { + return GAV; + } + } + } + + // Indirect call, nothinng to link. + return 0; +} + +static void InitReferenceMap(llvm::Module *Src, + std::map &InExternFuncs, + std::list &ExternFuncs) { + for (Module::iterator SF = Src->begin(), E = Src->end(); SF != E; ++SF) { + Function *F = SF; + if (!F || !F->hasName()) + continue; + + // All no-body functions should be library functions. Initialize ExternFuncs + // with all those functions. + // + // Weak functions are also treated as no-body functions. + if (!F->hasLocalLinkage() && + (F->isDeclaration() || F->isWeakForLinker())) { + InExternFuncs[F->getName()] = true; + ExternFuncs.push_back(F->getName()); + } + } +} + +static void AddFuncReferences(llvm::Module *M, Function *SF, + std::map &InExternFuncs, + std::list &ExternFuncs, + std::map &ModuleRefMap) +{ + // Add this function's direct extern reference into ExternFuncs if it is not + // in ExternFuncs yet. For a local reference, invoke itself to visit that + // local function body if it has not been visited. ModuleRefMap tells which + // local functions have been visited already. + for (Function::iterator BB = SF->begin(), BE = SF->end(); BB != BE; ++BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (CallInst* CI = dyn_cast(&*I)) { + Value* Callee = GetCalledFunction(CI); + if (!Callee) + continue; + + Function *F; + if (GlobalAlias *GAV = dyn_cast(Callee)) { + assert(GAV && "invoke non-func/non-alias"); + ModuleRefMap[GAV] = true; + if (!GAV->hasLocalLinkage()) { + if (!InExternFuncs[GAV->getName()]) { + InExternFuncs[GAV->getName()] = true; + ExternFuncs.insert(ExternFuncs.end(), 1, GAV->getName()); + } + } + + GlobalValue *AV = const_cast(GAV->getAliasedGlobal()); + F = dyn_cast(AV); + } else { + F = dyn_cast(Callee); + } + assert (F && "Should invoke either a func or an alias to a func"); + + M->Materialize(F); + if (!F->hasLocalLinkage()) { + ModuleRefMap[F] = true; + if (!InExternFuncs[F->getName()]) { + InExternFuncs[F->getName()] = true; + ExternFuncs.insert(ExternFuncs.end(), 1, F->getName()); + } + } + else if (!ModuleRefMap[F]) { + ModuleRefMap[F] = true; + AddFuncReferences(M, F, InExternFuncs, ExternFuncs, ModuleRefMap); + } + } + } + } +} + +static void AddReferences(std::vector LibMs, + std::map &InExternFuncs, + std::map *ModuleRefMaps, + std::list &ExternFuncs) { + // ExternFuncs is a list of library functions referenced from the Application. + // Input: + // a list of the extern library functions directly referenced by the + // application. + // Ouput: + // a list of all extern library functions needed by the application (directl + // and indirectly referenced by the application). + // + // InExternFuncs is a helper map that simply returns true if an extern function + // is in ExternFuncs. + + // ModuleRefMaps[0:num-of-libs-1] + // ModuleRefMaps[i]: tells which functions in Library[i] should be in the final + // module. It includes both extern and local functions. + // + // The standard linker links one library at a time when there are more than + // one libraries to link. Here we use a different approach in which it first + // calculates which functions should be linked in from each library, then + // does the actual linking. + // + // For example, assume there are two libraries A and B, where A defines + // function f, which references function g; and B defines function g, which + // references function k. Both A and B defines function k (f, g, k are all + // externs). A is linked first, and is followed by B. + // + // Apps A B + // ------ ----- ------ + // use f def f def g + // use g use k + // def k def k + // + // This approach will extract f and k from A, and g from B. + // + // To calculate which functions should be extracted from which library, it + // starts from directly-referenced extern functions from the application + // module, and calculates their transitive closure. The following loop + // iterates over the ExternFuncs list and in each iteration, a function body + // is visited and every new extern function, if any, is added into the end + // of ExternFuncs (insertion does not invalidates std::list's iterator) so + // that the new function will be visited later. + // + // AddFuncReferences() actually visits a function's body. It adds any extern + // function in ExternFuncs. For any local function, it recursively call itself + // so that all local functions get visited. + + for (std::list::iterator it = ExternFuncs.begin(), + IE = ExternFuncs.end(); it != IE; ++it) { + std::string FName = *it; + + // In case there're multiple definitions to FName, only the first strong + // definition needs to be scanned (scan its function body). The libraries + // following this strong definition does not need to scan another function + // body if FName is defined again; but they do need to add FName to their + // reference map if FName is referenced. For this reason, we keep looping + // over all iterations and guard body-scanning with ScanFuncBody. + bool ScanFuncBody = true; + + // Search library in order + for (unsigned int i=0; i < LibMs.size(); i++) { + llvm::Module* Src = LibMs[i]; + ValueSymbolTable &SrcSymTab = Src->getValueSymbolTable(); + GlobalValue *SGV = cast_or_null(SrcSymTab.lookup(FName)); + if (SGV) { + // If SGV has local linkage, it is not the same as FName, skip it. + if (SGV->hasLocalLinkage()) + continue; + + // Found a declaration or definition for extern FName. + ModuleRefMaps[i][SGV] = true; + + Function* SF = 0; + if (GlobalAlias *GAV = dyn_cast(SGV)) { + // Don't allow multiple defs to any one alias + // (like Lib1:x = y; Lib2:x = z), + // nor an alias def and a normal def (like Lib1: x {}; Lib2:x=z). + // If this happens, linking may have a undefined behavior and LLVM + // may give an error message. + GlobalValue *AV = const_cast(GAV->getAliasedGlobal()); + SF = dyn_cast(AV); + Src->Materialize(SF); + + if (!SF->hasLocalLinkage()) { + ModuleRefMaps[i][SF] = true; + if (!InExternFuncs[SF->getName()]) { + InExternFuncs[SF->getName()] = true; + ExternFuncs.insert(IE, 1, SF->getName()); + } + } else if (!ModuleRefMaps[i][SF]) { + // It is local and not yet scanned, scan it now. + ModuleRefMaps[i][SF] = true; + AddFuncReferences(Src, SF, InExternFuncs, ExternFuncs, + ModuleRefMaps[i]); + } + + // If this definition of FName is weak, continue finding another + // definition. + if (SGV->isWeakForLinker()) + continue; + // For alias, don't set ScanFuncBody. We assume only one alias def + // for any symbol in the entire application (user code + libs). + } else if (SF = dyn_cast(SGV)) { + if (!ScanFuncBody) + continue; + + Src->Materialize(SF); + + if (!SF->isDeclaration()) + AddFuncReferences(Src, SF, InExternFuncs, ExternFuncs, + ModuleRefMaps[i]); + + // If this definition of FName is weak, continue finding another + // definition. + if (SF->isWeakForLinker()) + continue; + + // Found a strong definition, don't scan anymore. + if (!SF->isDeclaration()) + ScanFuncBody = false; + } else { + assert (false && "Unknown global references"); + } + } + } + } +} + +static void UnlinkGlobals(Module* M, + std::set AliveGlobals) { + std::vector DeadGlobals; + for (Module::global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) { + GlobalVariable* GVar = I; + if (GVar->use_empty() && (AliveGlobals.count(GVar) == 0)) { + DeadGlobals.push_back(GVar); + } + } + + for (int i=0, e = (int)DeadGlobals.size(); i < e; i++) { + M->getGlobalList().erase(DeadGlobals[i]); + } +} + +static bool linkWithModule(Module* Dst, llvm::Module* Src, + std::map *ModuleRefMap, + std::string *ErrorMsg) { + if (Linker::LinkModules(Dst, Src, llvm::Linker::PreserveSource, + ErrorMsg, ModuleRefMap)) { + return true; + } + +#ifndef NDEBUG + raw_string_ostream MsgsOS(*ErrorMsg); + if (verifyModule(*Dst, &MsgsOS)) { + return true; + } +#endif + return false; +} + +namespace llvm { + +/// resolveLink - This function links an input module with the libraries +/// by resolving undefined symbols from these libraries. +/// Input module is modified to contain the linking result. If an error +/// occurs, true is returned and ErrorMsg (if not null) is set to indicate +/// the problem. Upon failure, the destination module could be in a modified +/// state, and shouldn't be relied on to be consistent. +/// Unresolved symbols may remain upon successful link. +bool resolveLink(llvm::Module* input, std::vector &libs, + std::string *ErrorMsg) +{ + int ret = 0; + + std::list ExternFuncs; + std::map InExternFuncs; + std::string ErrorMessage; + + std::map + *modRefMaps = new std::map[libs.size()]; + + InitReferenceMap(input, InExternFuncs, ExternFuncs); + AddReferences(libs, InExternFuncs, modRefMaps, ExternFuncs); + + // A quick workaround to unlink useless globals from libraries. + std::set AliveGlobals; + for (Module::global_iterator I = input->global_begin(), + E = input->global_end(); I != E; ++I) { + llvm::GlobalVariable* GVar = I; + AliveGlobals.insert(GVar); + } + + // Link libraries to get every functions that are referenced. + for (unsigned int i=0; i < libs.size(); i++) { + llvm::Module* Library = libs[i]; + if (linkWithModule(input, Library, &modRefMaps[i], ErrorMsg)) { + delete[] modRefMaps; + return true; + } + + delete Library; + libs[i] = 0; + } + + delete[] modRefMaps; + + // Now, unlink those useless globals linked in from libraries. + UnlinkGlobals(input, AliveGlobals); + AliveGlobals.clear(); + + return false; +} +} // namespace llvm Index: test/Linker/resolve-a.ll =================================================================== --- test/Linker/resolve-a.ll +++ test/Linker/resolve-a.ll @@ -0,0 +1,11 @@ +; RUN: llvm-link %s -l%p/resolve-b.ll -S -o - | FileCheck %s + +; CHECK-NOT: unreferenced + +define void @foo() nounwind { +entry: + tail call void @bar() nounwind + ret void +} + +declare void @bar() nounwind Index: test/Linker/resolve-b.ll =================================================================== --- test/Linker/resolve-b.ll +++ test/Linker/resolve-b.ll @@ -0,0 +1,14 @@ +; This file is used by resolve-a.ll, so it doesn't actually do anything itself +; +; RUN: true + +define void @bar() nounwind { +entry: + ret void +} + +define void @unreferenced() nounwind { +entry: + ret void +} + Index: tools/llvm-link/llvm-link.cpp =================================================================== --- tools/llvm-link/llvm-link.cpp +++ tools/llvm-link/llvm-link.cpp @@ -8,11 +8,12 @@ //===----------------------------------------------------------------------===// // // This utility may be invoked in the following manner: -// llvm-link a.bc b.bc c.bc -o x.bc +// llvm-link a.bc b.bc c.bc -lc.bc -ld.bc -o x.bc // //===----------------------------------------------------------------------===// #include "llvm/Linker/Linker.h" +#include "llvm/Linker/ResolveLinker.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -54,6 +55,10 @@ SuppressWarnings("suppress-warnings", cl::desc("Suppress all linking warnings"), cl::init(false)); +static cl::list Libraries("l", cl::Prefix, + cl::desc("Specify libraries to link to"), + cl::value_desc("library prefix")); + // LoadFile - Read the specified bitcode file in and return it. This routine // searches the link path for the specified file to try to find it... // @@ -98,7 +103,7 @@ return 1; } - if (Verbose) errs() << "Linking in '" << InputFilenames[i] << "'\n"; + if (Verbose) errs() << "Appending in '" << InputFilenames[i] << "'\n"; if (L.linkInModule(M.get(), &ErrorMessage)) { errs() << argv[0] << ": link error in '" << InputFilenames[i] @@ -107,6 +112,30 @@ } } + // Link unresolved symbols from libraries. + std::vector Libs; + for (std::vector::iterator i = Libraries.begin(), + e = Libraries.end(); i != e; ++i) { + std::auto_ptr M(LoadFile(argv[0], *i, Context)); + if (M.get() == 0) { + SMDiagnostic Err(*i, SourceMgr::DK_Error, "error loading file"); + Err.print(argv[0], errs()); + return 1; + } + if (Verbose) errs() << "Linking in '" << *i << "'\n"; + Libs.push_back(M.get()); + M.release(); + } + + if (Libs.size() > 0) { + std::string ErrorMsg; + if (resolveLink(Composite.get(), Libs, &ErrorMsg)) { + SMDiagnostic Err(InputFilenames[BaseArg], SourceMgr::DK_Error, ErrorMsg); + Err.print(argv[0], errs()); + return 1; + } + } + if (DumpAsm) errs() << "Here's the assembly:\n" << *Composite; std::string ErrorInfo;