Index: llvm/trunk/include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- llvm/trunk/include/llvm/Bitcode/LLVMBitCodes.h +++ llvm/trunk/include/llvm/Bitcode/LLVMBitCodes.h @@ -105,6 +105,9 @@ // METADATA_VALUES: [numvals] MODULE_CODE_METADATA_VALUES = 15, + + // SOURCE_FILENAME: [namechar x N] + MODULE_CODE_SOURCE_FILENAME = 16, }; /// PARAMATTR blocks have code for defining a parameter attribute set. @@ -172,7 +175,7 @@ VST_CODE_ENTRY = 1, // VST_ENTRY: [valueid, namechar x N] VST_CODE_BBENTRY = 2, // VST_BBENTRY: [bbid, namechar x N] VST_CODE_FNENTRY = 3, // VST_FNENTRY: [valueid, offset, namechar x N] - // VST_COMBINED_FNENTRY: [offset, namechar x N] + // VST_COMBINED_FNENTRY: [funcsumoffset, funcguid] VST_CODE_COMBINED_FNENTRY = 4 }; Index: llvm/trunk/include/llvm/IR/Function.h =================================================================== --- llvm/trunk/include/llvm/IR/Function.h +++ llvm/trunk/include/llvm/IR/Function.h @@ -27,6 +27,7 @@ #include "llvm/IR/GlobalObject.h" #include "llvm/IR/OperandTraits.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/MD5.h" namespace llvm { @@ -650,6 +651,12 @@ GlobalValue::LinkageTypes Linkage, StringRef FileName); + /// Return a 64-bit global unique ID constructed from global function name + /// (i.e. returned by getGlobalIdentifier). + static uint64_t getGUID(StringRef GlobalFuncName) { + return MD5Hash(GlobalFuncName); + } + private: void allocHungoffUselist(); template void setHungoffOperand(Constant *C); Index: llvm/trunk/include/llvm/IR/FunctionInfo.h =================================================================== --- llvm/trunk/include/llvm/IR/FunctionInfo.h +++ llvm/trunk/include/llvm/IR/FunctionInfo.h @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" @@ -146,8 +147,12 @@ /// COMDAT functions of the same name. typedef std::vector> FunctionInfoList; -/// Map from function name to corresponding function info structures. -typedef StringMap FunctionInfoMapTy; +/// Map from function GUID to corresponding function info structures. +/// Use a std::map rather than a DenseMap since it will likely incur +/// less overhead, as the value type is not very small and the size +/// of the map is unknown, resulting in inefficiencies due to repeated +/// insertions and resizing. +typedef std::map FunctionInfoMapTy; /// Type used for iterating through the function info map. typedef FunctionInfoMapTy::const_iterator const_funcinfo_iterator; @@ -184,17 +189,21 @@ /// Get the list of function info objects for a given function. const FunctionInfoList &getFunctionInfoList(StringRef FuncName) { - return FunctionMap[FuncName]; + return FunctionMap[Function::getGUID(FuncName)]; } /// Get the list of function info objects for a given function. const const_funcinfo_iterator findFunctionInfoList(StringRef FuncName) const { - return FunctionMap.find(FuncName); + return FunctionMap.find(Function::getGUID(FuncName)); } /// Add a function info for a function of the given name. void addFunctionInfo(StringRef FuncName, std::unique_ptr Info) { - FunctionMap[FuncName].push_back(std::move(Info)); + FunctionMap[Function::getGUID(FuncName)].push_back(std::move(Info)); + } + + void addFunctionInfo(uint64_t FuncGUID, std::unique_ptr Info) { + FunctionMap[FuncGUID].push_back(std::move(Info)); } /// Iterator to allow writer to walk through table during emission. Index: llvm/trunk/include/llvm/IR/Module.h =================================================================== --- llvm/trunk/include/llvm/IR/Module.h +++ llvm/trunk/include/llvm/IR/Module.h @@ -170,6 +170,8 @@ std::unique_ptr Materializer; ///< Used to materialize GlobalValues std::string ModuleID; ///< Human readable identifier for the module + std::string SourceFileName; ///< Original source file name for module, + ///< recorded in bitcode. std::string TargetTriple; ///< Platform target triple Module compiled on ///< Format: (arch)(sub)-(vendor)-(sys0-(abi) void *NamedMDSymTab; ///< NamedMDNode names. @@ -195,6 +197,12 @@ /// @returns the module identifier as a string const std::string &getModuleIdentifier() const { return ModuleID; } + /// Get the module's original source file name. When compiling from + /// bitcode, this is taken from a bitcode record where it was recorded. + /// For other compiles it is the same as the ModuleID, which would + /// contain the source file name. + const std::string &getSourceFileName() const { return SourceFileName; } + /// \brief Get a short "name" for the module. /// /// This is useful for debugging or logging. It is essentially a convenience @@ -240,6 +248,9 @@ /// Set the module identifier. void setModuleIdentifier(StringRef ID) { ModuleID = ID; } + /// Set the module's original source file name. + void setSourceFileName(StringRef Name) { SourceFileName = Name; } + /// Set the data layout void setDataLayout(StringRef Desc); void setDataLayout(const DataLayout &Other); Index: llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp @@ -458,6 +458,9 @@ /// summary records. DenseMap ModuleIdMap; + /// Original source file name recorded in a bitcode record. + std::string SourceFileName; + public: std::error_code error(BitcodeError E, const Twine &Message); std::error_code error(BitcodeError E); @@ -3697,6 +3700,13 @@ assert(MetadataList.size() == 0); MetadataList.resize(NumModuleMDs); break; + /// MODULE_CODE_SOURCE_FILENAME: [namechar x N] + case bitc::MODULE_CODE_SOURCE_FILENAME: + SmallString<128> ValueName; + if (convertToString(Record, 0, ValueName)) + return error("Invalid record"); + TheModule->setSourceFileName(ValueName); + break; } Record.clear(); } @@ -5454,24 +5464,31 @@ return error("Invalid record"); unsigned ValueID = Record[0]; uint64_t FuncOffset = Record[1]; - std::unique_ptr FuncInfo = - llvm::make_unique(FuncOffset); - if (foundFuncSummary() && !IsLazy) { + assert(!IsLazy && "Lazy summary read only supported for combined index"); + // Gracefully handle bitcode without a function summary section, + // which will simply not populate the index. + if (foundFuncSummary()) { DenseMap>::iterator SMI = SummaryMap.find(ValueID); assert(SMI != SummaryMap.end() && "Summary info not found"); + std::unique_ptr FuncInfo = + llvm::make_unique(FuncOffset); FuncInfo->setFunctionSummary(std::move(SMI->second)); + assert(!SourceFileName.empty()); + TheIndex->addFunctionInfo( + Function::getGlobalIdentifier( + ValueName, FuncInfo->functionSummary()->getFunctionLinkage(), + SourceFileName), + std::move(FuncInfo)); } - TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo)); ValueName.clear(); break; } case bitc::VST_CODE_COMBINED_FNENTRY: { - // VST_CODE_FNENTRY: [offset, namechar x N] - if (convertToString(Record, 1, ValueName)) - return error("Invalid record"); + // VST_CODE_COMBINED_FNENTRY: [offset, funcguid] uint64_t FuncSummaryOffset = Record[0]; + uint64_t FuncGUID = Record[1]; std::unique_ptr FuncInfo = llvm::make_unique(FuncSummaryOffset); if (foundFuncSummary() && !IsLazy) { @@ -5480,7 +5497,7 @@ assert(SMI != SummaryMap.end() && "Summary info not found"); FuncInfo->setFunctionSummary(std::move(SMI->second)); } - TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo)); + TheIndex->addFunctionInfo(FuncGUID, std::move(FuncInfo)); ValueName.clear(); break; @@ -5499,6 +5516,8 @@ if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return error("Invalid record"); + SmallVector Record; + // Read the function index for this module. while (1) { BitstreamEntry Entry = Stream.advance(); @@ -5551,7 +5570,24 @@ continue; case BitstreamEntry::Record: - Stream.skipRecord(Entry.ID); + // Once we find the single record of interest, skip the rest. + if (!SourceFileName.empty()) + Stream.skipRecord(Entry.ID); + else { + Record.clear(); + auto BitCode = Stream.readRecord(Entry.ID, Record); + switch (BitCode) { + default: + break; // Default behavior, ignore unknown content. + /// MODULE_CODE_SOURCE_FILENAME: [namechar x N] + case bitc::MODULE_CODE_SOURCE_FILENAME: + SmallString<128> ValueName; + if (convertToString(Record, 0, ValueName)) + return error("Invalid record"); + SourceFileName = ValueName.c_str(); + break; + } + } continue; } } Index: llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp +++ llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -618,6 +618,24 @@ return Stream.GetCurrentBitNo() - 32; } +enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 }; + +/// Determine the encoding to use for the given string name and length. +static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) { + bool isChar6 = true; + for (const char *C = Str, *E = C + StrLen; C != E; ++C) { + if (isChar6) + isChar6 = BitCodeAbbrevOp::isChar6(*C); + if ((unsigned char)*C & 128) + // don't bother scanning the rest. + return SE_Fixed8; + } + if (isChar6) + return SE_Char6; + else + return SE_Fixed7; +} + /// Emit top-level description of module, including target triple, inline asm, /// descriptors for global variables, and function prototype info. /// Returns the bit offset to backpatch with the location of the real VST. @@ -791,13 +809,40 @@ // function importing where we lazy load the metadata as a postpass, // we want to avoid parsing the module-level metadata before parsing // the imported functions. - BitCodeAbbrev *Abbv = new BitCodeAbbrev(); - Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_METADATA_VALUES)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); - unsigned MDValsAbbrev = Stream.EmitAbbrev(Abbv); - Vals.push_back(VE.numMDs()); - Stream.EmitRecord(bitc::MODULE_CODE_METADATA_VALUES, Vals, MDValsAbbrev); - Vals.clear(); + { + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_METADATA_VALUES)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + unsigned MDValsAbbrev = Stream.EmitAbbrev(Abbv); + Vals.push_back(VE.numMDs()); + Stream.EmitRecord(bitc::MODULE_CODE_METADATA_VALUES, Vals, MDValsAbbrev); + Vals.clear(); + } + + // Emit the module's source file name. + { + StringEncoding Bits = + getStringEncoding(M->getName().data(), M->getName().size()); + BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8); + if (Bits == SE_Char6) + AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6); + else if (Bits == SE_Fixed7) + AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7); + + // MODULE_CODE_SOURCE_FILENAME: [namechar x N] + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(AbbrevOpToUse); + unsigned FilenameAbbrev = Stream.EmitAbbrev(Abbv); + + for (const auto P : M->getSourceFileName()) + Vals.push_back((unsigned char)P); + + // Emit the finished record. + Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev); + Vals.clear(); + } uint64_t VSTOffsetPlaceholder = WriteValueSymbolTableForwardDecl(M->getValueSymbolTable(), Stream); @@ -2195,24 +2240,6 @@ Vals.clear(); } -enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 }; - -/// Determine the encoding to use for the given string name and length. -static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) { - bool isChar6 = true; - for (const char *C = Str, *E = C + StrLen; C != E; ++C) { - if (isChar6) - isChar6 = BitCodeAbbrevOp::isChar6(*C); - if ((unsigned char)*C & 128) - // don't bother scanning the rest. - return SE_Fixed8; - } - if (isChar6) - return SE_Char6; - else - return SE_Fixed7; -} - /// Emit names for globals/functions etc. The VSTOffsetPlaceholder, /// BitcodeStartBit and FunctionIndex are only passed for the module-level /// VST, where we are including a function bitcode index and need to @@ -2352,51 +2379,24 @@ BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); - // 8-bit fixed-width VST_CODE_COMBINED_FNENTRY function strings. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); - unsigned FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcsumoffset + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcguid + unsigned FnEntryAbbrev = Stream.EmitAbbrev(Abbv); - // 7-bit fixed width VST_CODE_COMBINED_FNENTRY function strings. - Abbv = new BitCodeAbbrev(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); - unsigned FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv); - - // 6-bit char6 VST_CODE_COMBINED_FNENTRY function strings. - Abbv = new BitCodeAbbrev(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); - unsigned FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv); - - // FIXME: We know if the type names can use 7-bit ascii. - SmallVector NameVals; + SmallVector NameVals; for (const auto &FII : Index) { - for (const auto &FI : FII.getValue()) { + for (const auto &FI : FII.second) { NameVals.push_back(FI->bitcodeIndex()); - StringRef FuncName = FII.first(); - - // Figure out the encoding to use for the name. - StringEncoding Bits = getStringEncoding(FuncName.data(), FuncName.size()); + uint64_t FuncGUID = FII.first; - // VST_CODE_COMBINED_FNENTRY: [funcsumoffset, namechar x N] - unsigned AbbrevToUse = FnEntry8BitAbbrev; - if (Bits == SE_Char6) - AbbrevToUse = FnEntry6BitAbbrev; - else if (Bits == SE_Fixed7) - AbbrevToUse = FnEntry7BitAbbrev; + // VST_CODE_COMBINED_FNENTRY: [funcsumoffset, funcguid] + unsigned AbbrevToUse = FnEntryAbbrev; - for (const auto P : FuncName) - NameVals.push_back((unsigned char)P); + NameVals.push_back(FuncGUID); // Emit the finished record. Stream.EmitRecord(bitc::VST_CODE_COMBINED_FNENTRY, NameVals, AbbrevToUse); @@ -2855,7 +2855,7 @@ SmallVector NameVals; for (const auto &FII : I) { - for (auto &FI : FII.getValue()) { + for (auto &FI : FII.second) { FunctionSummary *FS = FI->functionSummary(); assert(FS); Index: llvm/trunk/lib/IR/FunctionInfo.cpp =================================================================== --- llvm/trunk/lib/IR/FunctionInfo.cpp +++ llvm/trunk/lib/IR/FunctionInfo.cpp @@ -23,7 +23,7 @@ StringRef ModPath; for (auto &OtherFuncInfoLists : *Other) { - std::string FuncName = OtherFuncInfoLists.getKey(); + uint64_t FuncGUID = OtherFuncInfoLists.first; FunctionInfoList &List = OtherFuncInfoLists.second; // Assert that the func info list only has one entry, since we shouldn't @@ -49,20 +49,9 @@ // string reference owned by the combined index. Info->functionSummary()->setModulePath(ModPath); - // If it is a local function, rename it. - if (GlobalValue::isLocalLinkage( - Info->functionSummary()->getFunctionLinkage())) { - // Any local functions are virtually renamed when being added to the - // combined index map, to disambiguate from other functions with - // the same name. The symbol table created for the combined index - // file should contain the renamed symbols. - FuncName = - FunctionInfoIndex::getGlobalNameForLocal(FuncName, NextModuleId); - } - // Add new function info to existing list. There may be duplicates when // combining FunctionMap entries, due to COMDAT functions. Any local - // functions were virtually renamed above. - addFunctionInfo(FuncName, std::move(Info)); + // functions were given unique global IDs. + addFunctionInfo(FuncGUID, std::move(Info)); } } Index: llvm/trunk/lib/IR/Module.cpp =================================================================== --- llvm/trunk/lib/IR/Module.cpp +++ llvm/trunk/lib/IR/Module.cpp @@ -47,7 +47,7 @@ // Module::Module(StringRef MID, LLVMContext &C) - : Context(C), Materializer(), ModuleID(MID), DL("") { + : Context(C), Materializer(), ModuleID(MID), SourceFileName(MID), DL("") { ValSymTab = new ValueSymbolTable(); NamedMDSymTab = new StringMap(); Context.addModule(this); Index: llvm/trunk/lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/FunctionImport.cpp +++ llvm/trunk/lib/Transforms/IPO/FunctionImport.cpp @@ -126,7 +126,11 @@ if (CalledFunction->hasInternalLinkage()) { ImportedName = Renamed; } - auto It = CalledFunctions.insert(ImportedName); + // Compute the global identifier used in the function index. + auto CalledFunctionGlobalID = Function::getGlobalIdentifier( + CalledFunction->getName(), CalledFunction->getLinkage(), + CalledFunction->getParent()->getSourceFileName()); + auto It = CalledFunctions.insert(CalledFunctionGlobalID); if (!It.second) { // This is a call to a function we already considered, skip. continue; @@ -213,14 +217,12 @@ GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName); if (!SGV) { - // The destination module is referencing function using their renamed name - // when importing a function that was originally local in the source - // module. The source module we have might not have been renamed so we try - // to remove the suffix added during the renaming to recover the original + // The function is referenced by a global identifier, which has the + // source file name prepended for functions that were originally local + // in the source module. Strip any prepended name to recover the original // name in the source module. - std::pair Split = - CalledFunctionName.split(".llvm."); - SGV = SrcModule.getNamedValue(Split.first); + std::pair Split = CalledFunctionName.split(":"); + SGV = SrcModule.getNamedValue(Split.second); assert(SGV && "Can't find function to import in source module"); } if (!SGV) { Index: llvm/trunk/test/Bitcode/source-filename.test =================================================================== --- llvm/trunk/test/Bitcode/source-filename.test +++ llvm/trunk/test/Bitcode/source-filename.test @@ -0,0 +1,2 @@ +; RUN: llvm-bcanalyzer -dump %p/Inputs/source-filename.bc | FileCheck %s +; CHECK: