Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -97,3 +97,4 @@ add_subdirectory(docs) add_subdirectory(COFF) +add_subdirectory(ELF) Index: ELF/CMakeLists.txt =================================================================== --- /dev/null +++ ELF/CMakeLists.txt @@ -0,0 +1,23 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(ELFOptionsTableGen) + +add_llvm_library(lldELF2 + Chunks.cpp + Driver.cpp + DriverUtils.cpp + InputFiles.cpp + SymbolTable.cpp + Symbols.cpp + Writer.cpp + + LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + Core + LTO + MC + MCDisassembler + Support + ) + +add_dependencies(lldELF2 ELFOptionsTableGen) Index: ELF/Chunks.h =================================================================== --- /dev/null +++ ELF/Chunks.h @@ -0,0 +1,152 @@ +//===- Chunks.h -----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_CHUNKS_H +#define LLD_ELF_CHUNKS_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/ELF.h" +#include +#include + +namespace lld { +namespace elfv2 { + +class Defined; +template class ObjectFile; +class OutputSection; + +// A Chunk represents a chunk of data that will occupy space in the +// output (if the resolver chose that). It may or may not be backed by +// a section of an input file. It could be linker-created data, or +// doesn't even have actual data (if common or bss). +class Chunk { +public: + virtual ~Chunk() = default; + + // Returns the size of this chunk (even if this is a common or BSS.) + virtual size_t getSize() const = 0; + + // Write this chunk to a mmap'ed file, assuming Buf is pointing to + // beginning of the file. Because this function may use VA values + // of other chunks for relocations, you need to set them properly + // before calling this function. + virtual void writeTo(uint8_t *Buf) {} + + // The writer sets and uses the addresses. + uint64_t getVA() { return VA; } + uint64_t getFileOff() { return FileOff; } + uint32_t getAlign() { return Align; } + void setVA(uint64_t V) { VA = V; } + void setFileOff(uint64_t V) { FileOff = V; } + + // Returns true if this has non-zero data. BSS chunks return + // false. If false is returned, the space occupied by this chunk + // will be filled with zeros. + virtual bool hasData() const { return true; } + + // Returns readable/writable/executable bits. + virtual uint32_t getFlags() const { return 0; } + + // Returns the section name if this is a section chunk. + // It is illegal to call this function on non-section chunks. + virtual StringRef getSectionName() const { + llvm_unreachable("unimplemented getSectionName"); + } + + // Called if the garbage collector decides to not include this chunk + // in a final output. It's supposed to print out a log message to stdout. + // It is illegal to call this function on non-section chunks because + // only section chunks are subject of garbage collection. + virtual void printDiscardedMessage() { + llvm_unreachable("unimplemented printDiscardedMessage"); + } + + // Used by the garbage collector. + bool isRoot() { return Root; } + bool isLive() { return Live; } + void markLive() { + if (!Live) + mark(); + } + + // An output section has pointers to chunks in the section, and each + // chunk has a back pointer to an output section. + void setOutputSection(OutputSection *O) { Out = O; } + OutputSection *getOutputSection() { return Out; } + +protected: + // The VA of this chunk in the output. The writer sets a value. + uint64_t VA = 0; + + // The offset from beginning of the output file. The writer sets a value. + uint64_t FileOff = 0; + + // The output section for this chunk. + OutputSection *Out = nullptr; + + // The alignment of this chunk. The writer uses the value. + uint32_t Align = 1; + + // Used by the garbage collector. + virtual void mark() {} + bool Live = true; + bool Root = false; +}; + +// A chunk corresponding a section of an input file. +template class SectionChunk : public Chunk { + typedef llvm::object::Elf_Shdr_Impl Elf_Shdr; + typedef llvm::object::Elf_Rel_Impl Elf_Rela; + typedef llvm::object::Elf_Rel_Impl Elf_Rel; + +public: + SectionChunk(ObjectFile *File, const Elf_Shdr *Header, + uint32_t SectionIndex); + size_t getSize() const override { return Header->sh_size; } + void writeTo(uint8_t *Buf) override; + bool hasData() const override; + uint32_t getFlags() const override; + StringRef getSectionName() const override { return SectionName; } + void printDiscardedMessage() override; + +private: + void mark() override; + const Elf_Shdr *getSectionHdr(); + void applyReloc(uint8_t *Buf, const Elf_Rela *Rel); + void applyReloc(uint8_t *Buf, const Elf_Rel *Rel); + + // A file this chunk was created from. + ObjectFile *File; + + const Elf_Shdr *Header; + uint32_t SectionIndex; + StringRef SectionName; +}; + +// A chunk for common symbols. Common chunks don't have actual data. +template class CommonChunk : public Chunk { + typedef llvm::object::Elf_Sym_Impl Elf_Sym; + +public: + CommonChunk(const Elf_Sym *Sym); + size_t getSize() const override { return Sym->getValue(); } + bool hasData() const override { return false; } + uint32_t getFlags() const override; + StringRef getSectionName() const override { return ".bss"; } + +private: + const Elf_Sym *Sym; +}; + +} // namespace elfv2 +} // namespace lld + +#endif Index: ELF/Chunks.cpp =================================================================== --- /dev/null +++ ELF/Chunks.cpp @@ -0,0 +1,125 @@ +//===- Chunks.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "InputFiles.h" +#include "Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::support::endian; +using namespace llvm::ELF; + +using namespace lld; +using namespace lld::elfv2; + +template +SectionChunk::SectionChunk(elfv2::ObjectFile *F, const Elf_Shdr *H, + uint32_t SI) + : File(F), Header(H), SectionIndex(SI) { + // Initialize SectionName. + SectionName = *File->getObj()->getSectionName(Header); + + Align = Header->sh_addralign; + + // When a new chunk is created, we don't if if it's going to make it + // to the final output. Initially all sections are unmarked in terms + // of garbage collection. The writer will call markLive() to mark + // all reachable section chunks. + Live = false; + + Root = true; +} + +template void SectionChunk::writeTo(uint8_t *Buf) { + if (!hasData()) + return; + // Copy section contents from source object file to output file. + ArrayRef Data = *File->getObj()->getSectionContents(Header); + memcpy(Buf + FileOff, Data.data(), Data.size()); + + // FIXME: Relocations +} + +template void SectionChunk::mark() { + assert(!Live); + Live = true; + + // Mark all symbols listed in the relocation table for this section. + // FIXME: Relocations +} + +template +void SectionChunk::applyReloc(uint8_t *Buf, const Elf_Rela *Rel) { + // FIXME: Relocations +} + +template +void SectionChunk::applyReloc(uint8_t *Buf, const Elf_Rel *Rel) {} + +template bool SectionChunk::hasData() const { + return Header->sh_type != SHT_NOBITS; +} + +template uint32_t SectionChunk::getFlags() const { + return Header->sh_flags; +} + +// Prints "Discarded " for all external function symbols. +template void SectionChunk::printDiscardedMessage() { + auto Obj = File->getObj(); + + for (auto &&Sym : Obj->symbols()) { + auto Sec = Obj->getSection(&Sym); + if (Sec && *Sec != Header) + continue; + if (Sym.getType() != STT_FUNC) + continue; + if (auto Name = Obj->getStaticSymbolName(&Sym)) { + llvm::outs() << "Discarded " << *Name << " from " << File->getShortName() + << "\n"; + } + } +} + +template +const llvm::object::Elf_Shdr_Impl *SectionChunk::getSectionHdr() { + return Header; +} + +template +CommonChunk::CommonChunk(const Elf_Sym *S) + : Sym(S) { + // Alignment is a section attribute, but common symbols don't + // belong to any section. How do we know common data alignments? + // Needs investigating. For now, we set a large number as an alignment. + Align = 16; +} + +template uint32_t CommonChunk::getFlags() const { + return PF_R | PF_W; +} + +namespace lld { +namespace elfv2 { +template class SectionChunk; +template class SectionChunk; +template class SectionChunk; +template class SectionChunk; + +template class CommonChunk; +template class CommonChunk; +template class CommonChunk; +template class CommonChunk; +} +} Index: ELF/Config.h =================================================================== --- /dev/null +++ ELF/Config.h @@ -0,0 +1,40 @@ +//===- Config.h -----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_CONFIG_H +#define LLD_ELF_CONFIG_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ELF.h" +#include +#include + +namespace lld { +namespace elfv2 { + +using llvm::StringRef; + +class Configuration { +public: + int MachineArchitecture = llvm::ELF::EM_X86_64; + bool Verbose = false; + StringRef EntryName; + std::string OutputFile; + bool DoGC = true; + + // Symbols in this set are considered as live by the garbage collector. + std::set GCRoots; +}; + +extern Configuration *Config; + +} // namespace elfv2 +} // namespace lld + +#endif Index: ELF/Driver.h =================================================================== --- /dev/null +++ ELF/Driver.h @@ -0,0 +1,102 @@ +//===- Driver.h -----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_DRIVER_H +#define LLD_ELF_DRIVER_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ELF.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Support/StringSaver.h" +#include +#include +#include +#include + +namespace lld { +namespace elfv2 { + +class LinkerDriver; +extern LinkerDriver *Driver; + +using llvm::Optional; +class InputFile; + +// Entry point of the ELF linker. +bool link(llvm::ArrayRef Args); + +class ArgParser { +public: + ArgParser() : Alloc(AllocAux) {} + // Parses command line options. + ErrorOr parse(llvm::ArrayRef Args); + + // Tokenizes a given string and then parses as command line options. + ErrorOr parse(StringRef S) { + return parse(tokenize(S)); + } + +private: + ErrorOr parse(std::vector Argv); + + std::vector tokenize(StringRef S); + + ErrorOr> + replaceResponseFiles(std::vector); + + llvm::BumpPtrAllocator AllocAux; + llvm::BumpPtrStringSaver Alloc; +}; + +class LinkerDriver { +public: + LinkerDriver() : Alloc(AllocAux) {} + bool link(llvm::ArrayRef Args); + +private: + llvm::BumpPtrAllocator AllocAux; + llvm::BumpPtrStringSaver Alloc; + ArgParser Parser; + + // Opens a file. Path has to be resolved already. + ErrorOr openFile(StringRef Path); + + // Searches a file from search paths. + Optional findFile(StringRef Filename); + Optional findLib(StringRef Filename); + StringRef doFindFile(StringRef Filename); + StringRef doFindLib(StringRef Filename); + + std::vector SearchPaths; + std::set VisitedFiles; + + // Driver is the owner of all opened files. + // InputFiles have MemoryBufferRefs to them. + std::vector> OwningMBs; +}; + +// Functions below this line are defined in DriverUtils.cpp. + +void printHelp(const char *Argv0); + +// Create enum with OPT_xxx values for each option in Options.td +enum { + OPT_INVALID = 0, +#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11) OPT_##ID, +#include "Options.inc" +#undef OPTION +}; + +} // namespace elfv2 +} // namespace lld + +#endif Index: ELF/Driver.cpp =================================================================== --- /dev/null +++ ELF/Driver.cpp @@ -0,0 +1,229 @@ +//===- Driver.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "InputFiles.h" +#include "SymbolTable.h" +#include "Writer.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/LibDriver/LibDriver.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; +using llvm::sys::fs::file_magic; +using llvm::sys::fs::identify_magic; + +using namespace lld; +using namespace lld::elfv2; + +namespace lld { +namespace elfv2 { +Configuration *Config; +LinkerDriver *Driver; + +bool link(llvm::ArrayRef Args) { + auto C = make_unique(); + Config = C.get(); + auto D = make_unique(); + Driver = D.get(); + return Driver->link(Args); +} +} +} + +// Drop directory components and replace extension with ".exe". +static std::string getOutputPath(StringRef Path) { + auto P = Path.find_last_of("\\/"); + StringRef S = (P == StringRef::npos) ? Path : Path.substr(P + 1); + return (S.substr(0, S.rfind('.')) + ".exe").str(); +} + +// Opens a file. Path has to be resolved already. +// Newly created memory buffers are owned by this driver. +ErrorOr LinkerDriver::openFile(StringRef Path) { + auto MBOrErr = MemoryBuffer::getFile(Path); + if (auto EC = MBOrErr.getError()) + return EC; + std::unique_ptr MB = std::move(MBOrErr.get()); + MemoryBufferRef MBRef = MB->getMemBufferRef(); + OwningMBs.push_back(std::move(MB)); // take ownership + return MBRef; +} + +static std::unique_ptr createFile(MemoryBufferRef MB) { + // File type is detected by contents, not by file extension. + file_magic Magic = identify_magic(MB.getBuffer()); + if (Magic == file_magic::archive) + return std::unique_ptr(new ArchiveFile(MB)); + if (Magic == file_magic::bitcode) + return std::unique_ptr(new BitcodeFile(MB)); + if (Config->OutputFile == "") + Config->OutputFile = getOutputPath(MB.getBufferIdentifier()); + return std::unique_ptr(new ObjectFile(MB)); +} + +// Find file from search paths. You can omit ".obj", this function takes +// care of that. Note that the returned path is not guaranteed to exist. +StringRef LinkerDriver::doFindFile(StringRef Filename) { + bool hasPathSep = (Filename.find_first_of("/\\") != StringRef::npos); + if (hasPathSep) + return Filename; + bool hasExt = (Filename.find('.') != StringRef::npos); + for (StringRef Dir : SearchPaths) { + SmallString<128> Path = Dir; + llvm::sys::path::append(Path, Filename); + if (llvm::sys::fs::exists(Path.str())) + return Alloc.save(Path.str()); + if (!hasExt) { + Path.append(".obj"); + if (llvm::sys::fs::exists(Path.str())) + return Alloc.save(Path.str()); + } + } + return Filename; +} + +// Resolves a file path. This never returns the same path +// (in that case, it returns None). +Optional LinkerDriver::findFile(StringRef Filename) { + StringRef Path = doFindFile(Filename); + bool Seen = !VisitedFiles.insert(Path.lower()).second; + if (Seen) + return None; + return Path; +} + +// Find library file from search path. +StringRef LinkerDriver::doFindLib(StringRef Filename) { + // Add ".lib" to Filename if that has no file extension. + bool hasExt = (Filename.find('.') != StringRef::npos); + if (!hasExt) + Filename = Alloc.save(Filename + ".lib"); + return doFindFile(Filename); +} + +// Resolves a library path. /nodefaultlib options are taken into +// consideration. This never returns the same path (in that case, +// it returns None). +Optional LinkerDriver::findLib(StringRef Filename) { + StringRef Path = doFindLib(Filename); + bool Seen = !VisitedFiles.insert(Path.lower()).second; + if (Seen) + return None; + return Path; +} + +bool LinkerDriver::link(llvm::ArrayRef ArgsArr) { + // Needed for LTO. + llvm::InitializeAllTargetInfos(); + llvm::InitializeAllTargets(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllAsmPrinters(); + llvm::InitializeAllDisassemblers(); + + // Parse command line options. + auto ArgsOrErr = Parser.parse(ArgsArr); + if (auto EC = ArgsOrErr.getError()) { + llvm::errs() << EC.message() << "\n"; + return false; + } + llvm::opt::InputArgList Args = std::move(ArgsOrErr.get()); + + // Handle /help + if (Args.hasArg(OPT_help)) { + printHelp(ArgsArr[0]); + return true; + } + + if (Args.filtered_begin(OPT_INPUT) == Args.filtered_end()) { + llvm::errs() << "no input files.\n"; + return false; + } + + // Construct search path list. + SearchPaths.push_back(""); + for (auto *Arg : Args.filtered(OPT_L)) + SearchPaths.push_back(Arg->getValue()); + + // Handle /out + if (auto *Arg = Args.getLastArg(OPT_output)) + Config->OutputFile = Arg->getValue(); + + // Handle /entry + if (auto *Arg = Args.getLastArg(OPT_e)) + Config->EntryName = Arg->getValue(); + + // Create a list of input files. Files can be given as arguments + // for /defaultlib option. + std::vector InputPaths; + std::vector Inputs; + for (auto *Arg : Args.filtered(OPT_INPUT)) + if (Optional Path = findFile(Arg->getValue())) + InputPaths.push_back(*Path); + + for (StringRef Path : InputPaths) { + ErrorOr MBOrErr = openFile(Path); + if (auto EC = MBOrErr.getError()) { + llvm::errs() << "cannot open " << Path << ": " << EC.message() << "\n"; + return false; + } + Inputs.push_back(MBOrErr.get()); + } + + // Create a symbol table. + SymbolTable Symtab; + + // Parse all input files and put all symbols to the symbol table. + // The symbol table will take care of name resolution. + for (MemoryBufferRef MB : Inputs) { + std::unique_ptr File = createFile(MB); + if (Config->Verbose) + llvm::outs() << "Reading " << File->getName() << "\n"; + if (auto EC = Symtab.addFile(std::move(File))) { + llvm::errs() << MB.getBufferIdentifier() << ": " << EC.message() << "\n"; + return false; + } + } + + // Make sure we have resolved all symbols. + if (Symtab.reportRemainingUndefines()) + return false; + + // Initialize a list of GC root. + Config->GCRoots.insert(Config->EntryName); + + // Do LTO by compiling bitcode input files to a native ELF file + // then link that file. + if (auto EC = Symtab.addCombinedLTOObject()) { + llvm::errs() << EC.message() << "\n"; + return false; + } + + // Write the result. + Writer Out(&Symtab); + if (auto EC = Out.write(Config->OutputFile)) { + llvm::errs() << EC.message() << "\n"; + return false; + } + return true; +} Index: ELF/DriverUtils.cpp =================================================================== --- /dev/null +++ ELF/DriverUtils.cpp @@ -0,0 +1,121 @@ +//===- DriverUtils.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains utility functions for the driver. Because there +// are so many small functions, we created this separate file to make +// Driver.cpp less cluttered. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "Error.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +using llvm::cl::ExpandResponseFiles; +using llvm::cl::TokenizeWindowsCommandLine; +using llvm::sys::Process; + +using namespace lld; +using namespace lld::elfv2; + +// Create OptTable + +// Create prefix string literals used in Options.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Options.inc" +#undef PREFIX + +// Create table mapping all options defined in Options.td +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \ + { \ + X1, X2, X9, X10, OPT_##ID, llvm::opt::Option::KIND##Class, X8, X7, \ + OPT_##GROUP, OPT_##ALIAS, X6 \ + } \ + , +#include "Options.inc" +#undef OPTION +}; + +class ELFOptTable : public llvm::opt::OptTable { +public: + ELFOptTable() : OptTable(infoTable, llvm::array_lengthof(infoTable)) {} +}; + +// Parses a given list of options. +ErrorOr +ArgParser::parse(std::vector Argv) { + // First, replace respnose files (@-style options). + auto ArgvOrErr = replaceResponseFiles(Argv); + if (auto EC = ArgvOrErr.getError()) { + llvm::errs() << "error while reading response file: " << EC.message() + << "\n"; + return EC; + } + Argv = std::move(ArgvOrErr.get()); + + // Make InputArgList from string vectors. + ELFOptTable Table; + unsigned MissingIndex; + unsigned MissingCount; + + llvm::opt::InputArgList Args = + Table.ParseArgs(Argv, MissingIndex, MissingCount); + if (MissingCount) { + llvm::errs() << "missing arg value for \"" + << Args.getArgString(MissingIndex) << "\", expected " + << MissingCount + << (MissingCount == 1 ? " argument.\n" : " arguments.\n"); + return make_error_code(LLDError::InvalidOption); + } + for (auto *Arg : Args.filtered(OPT_UNKNOWN)) + llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; + return std::move(Args); +} + +ErrorOr +ArgParser::parse(llvm::ArrayRef Args) { + Args = Args.slice(1); + std::vector V(Args.begin(), Args.end()); + return parse(V); +} + +std::vector ArgParser::tokenize(StringRef S) { + SmallVector Tokens; + BumpPtrStringSaver Saver(AllocAux); + llvm::cl::TokenizeWindowsCommandLine(S, Saver, Tokens); + return std::vector(Tokens.begin(), Tokens.end()); +} + +// Creates a new command line by replacing options starting with '@' +// character. '@' is replaced by the file's contents. +ErrorOr> +ArgParser::replaceResponseFiles(std::vector Argv) { + SmallVector Tokens(Argv.data(), Argv.data() + Argv.size()); + BumpPtrStringSaver Saver(AllocAux); + ExpandResponseFiles(Saver, TokenizeWindowsCommandLine, Tokens); + return std::vector(Tokens.begin(), Tokens.end()); +} + +void lld::elfv2::printHelp(const char *Argv0) { + ELFOptTable Table; + Table.PrintHelp(llvm::outs(), Argv0, "LLVM Linker", false); +} Index: ELF/Error.h =================================================================== --- /dev/null +++ ELF/Error.h @@ -0,0 +1,54 @@ +//===- Error.h ------------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_ERROR_H +#define LLD_ELF_ERROR_H + +#include +#include +#include "llvm/Support/ErrorHandling.h" + +namespace lld { +namespace elfv2 { + +enum class LLDError { + InvalidOption = 1, + InvalidFile, + BrokenFile, + DuplicateSymbols, +}; + +class LLDErrorCategory : public std::error_category { +public: + const char *name() const LLVM_NOEXCEPT override { return "lld"; } + + std::string message(int EV) const override { + switch (static_cast(EV)) { + case LLDError::InvalidOption: + return "Invalid option"; + case LLDError::InvalidFile: + return "Invalid file"; + case LLDError::BrokenFile: + return "Broken file"; + case LLDError::DuplicateSymbols: + return "Duplicate symbols"; + } + llvm_unreachable("unknown error"); + } +}; + +inline std::error_code make_error_code(LLDError Err) { + static LLDErrorCategory C; + return std::error_code(static_cast(Err), C); +} + +} // namespace elfv2 +} // namespace lld + +#endif Index: ELF/InputFiles.h =================================================================== --- /dev/null +++ ELF/InputFiles.h @@ -0,0 +1,158 @@ +//===- InputFiles.h -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_INPUT_FILES_H +#define LLD_ELF_INPUT_FILES_H + +#include "Chunks.h" +#include "Symbols.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/LTO/LTOModule.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/StringSaver.h" +#include +#include +#include + +namespace lld { +namespace elfv2 { + +using llvm::LTOModule; +using llvm::object::Archive; +using llvm::object::ELFFile; + +// The root class of input files. +class InputFile { +public: + enum Kind { ArchiveKind, ObjectKind, ImportKind, BitcodeKind }; + Kind kind() const { return FileKind; } + virtual ~InputFile() {} + + // Returns the filename. + StringRef getName() { return MB.getBufferIdentifier(); } + + // Returns symbols defined by this file. + virtual std::vector &getSymbols() = 0; + + // Reads a file (constructors don't do that). Returns an error if a + // file is broken. + virtual std::error_code parse() = 0; + + // Returns a short, human-friendly filename. If this is a member of + // an archive file, a returned value includes parent's filename. + // Used for logging or debugging. + std::string getShortName(); + + // Sets a parent filename if this file is created from an archive. + void setParentName(StringRef N) { ParentName = N; } + +protected: + explicit InputFile(Kind K, MemoryBufferRef M) : MB(M), FileKind(K) {} + MemoryBufferRef MB; + +private: + const Kind FileKind; + StringRef ParentName; +}; + +// .lib or .a file. +class ArchiveFile : public InputFile { +public: + explicit ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } + std::error_code parse() override; + + // Returns a memory buffer for a given symbol. An empty memory buffer + // is returned if we have already returned the same memory buffer. + // (So that we don't instantiate same members more than once.) + ErrorOr getMember(const Archive::Symbol *Sym); + + // NB: All symbols returned by ArchiveFiles are of Lazy type. + std::vector &getSymbols() override { return SymbolBodies; } + +private: + std::unique_ptr File; + std::string Filename; + std::vector SymbolBodies; + std::set Seen; + llvm::MallocAllocator Alloc; +}; + +// .obj or .o file. This may be a member of an archive file. +template class ObjectFile : public InputFile { + typedef llvm::object::Elf_Sym_Impl Elf_Sym; + +public: + explicit ObjectFile(MemoryBufferRef M) : InputFile(ObjectKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == ObjectKind; } + std::error_code parse() override; + std::vector &getChunks() { return Chunks; } + std::vector &getSymbols() override { return SymbolBodies; } + + // Returns a SymbolBody object for the SymbolIndex'th symbol in the + // underlying object file. + SymbolBody *getSymbolBody(uint32_t SymbolIndex); + + // Returns the underying ELF file. + ELFFile *getObj() { return ELFObj.get(); } + +private: + std::error_code initializeChunks(); + std::error_code initializeSymbols(); + + SymbolBody *createSymbolBody(const Elf_Sym *Sym); + + std::unique_ptr> ELFObj; + llvm::BumpPtrAllocator Alloc; + + // List of all chunks defined by this file. This includes both section + // chunks and non-section chunks for common symbols. + std::vector Chunks; + + // This vector contains the same chunks as Chunks, but they are + // indexed such that you can get a SectionChunk by section index. + // Nonexistent section indices are filled with null pointers. + // (Because section number is 1-based, the first slot is always a + // null pointer.) + std::vector SparseChunks; + + // List of all symbols referenced or defined by this file. + std::vector SymbolBodies; + + // This vector contains the same symbols as SymbolBodies, but they + // are indexed such that you can get a SymbolBody by symbol + // index. Nonexistent indices (which are occupied by auxiliary + // symbols in the real symbol table) are filled with null pointers. + std::vector SparseSymbolBodies; +}; + +// Used for LTO. +class BitcodeFile : public InputFile { +public: + explicit BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {} + static bool classof(const InputFile *F) { return F->kind() == BitcodeKind; } + std::vector &getSymbols() override { return SymbolBodies; } + + LTOModule *getModule() const { return M.get(); } + LTOModule *releaseModule() { return M.release(); } + +private: + std::error_code parse() override; + + std::vector SymbolBodies; + llvm::BumpPtrAllocator Alloc; + std::unique_ptr M; +}; + +} // namespace elfv2 +} // namespace lld + +#endif Index: ELF/InputFiles.cpp =================================================================== --- /dev/null +++ ELF/InputFiles.cpp @@ -0,0 +1,216 @@ +//===- InputFiles.cpp -----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "Error.h" +#include "InputFiles.h" +#include "Writer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/LTO/LTOModule.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::ELF; +using namespace llvm::object; +using namespace llvm::support::endian; +using llvm::RoundUpToAlignment; +using llvm::sys::fs::identify_magic; +using llvm::sys::fs::file_magic; + +using namespace lld; +using namespace lld::elfv2; + +// Returns the last element of a path, which is supposed to be a filename. +static StringRef getBasename(StringRef Path) { + size_t Pos = Path.rfind('\\'); + if (Pos == StringRef::npos) + return Path; + return Path.substr(Pos + 1); +} + +// Returns a string in the format of "foo.obj" or "foo.obj(bar.lib)". +std::string InputFile::getShortName() { + if (ParentName == "") + return getName().lower(); + std::string Res = + (getBasename(ParentName) + "(" + getBasename(getName()) + ")").str(); + return StringRef(Res).lower(); +} + +std::error_code ArchiveFile::parse() { + // Parse a MemoryBufferRef as an archive file. + auto ArchiveOrErr = Archive::create(MB); + if (auto EC = ArchiveOrErr.getError()) + return EC; + File = std::move(ArchiveOrErr.get()); + + // Allocate a buffer for Lazy objects. + size_t BufSize = File->getNumberOfSymbols() * sizeof(Lazy); + Lazy *Buf = (Lazy *)Alloc.Allocate(BufSize, llvm::alignOf()); + + // Read the symbol table to construct Lazy objects. + uint32_t I = 0; + for (const Archive::Symbol &Sym : File->symbols()) { + SymbolBodies.push_back(new (&Buf[I++]) Lazy(this, Sym)); + } + return std::error_code(); +} + +// Returns a buffer pointing to a member file containing a given symbol. +ErrorOr ArchiveFile::getMember(const Archive::Symbol *Sym) { + auto ItOrErr = Sym->getMember(); + if (auto EC = ItOrErr.getError()) + return EC; + Archive::child_iterator It = ItOrErr.get(); + + // Return an empty buffer if we have already returned the same buffer. + const char *StartAddr = It->getBuffer().data(); + auto Pair = Seen.insert(StartAddr); + if (!Pair.second) + return MemoryBufferRef(); + return It->getMemoryBufferRef(); +} + +template std::error_code elfv2::ObjectFile::parse() { + // Parse a memory buffer as a ELF file. + std::error_code EC; + ELFObj = llvm::make_unique>(MB.getBuffer(), EC); + + if (EC) { + llvm::errs() << getName() << " is not an ELF file.\n"; + return EC; + } + + // Read section and symbol tables. + if ((EC = initializeChunks())) + return EC; + return initializeSymbols(); +} + +template +SymbolBody *elfv2::ObjectFile::getSymbolBody(uint32_t SymbolIndex) { + return SparseSymbolBodies[SymbolIndex]->getReplacement(); +} + +static bool isIgnoredSectionType(unsigned Type) { + switch (Type) { + case SHT_NULL: + case SHT_SYMTAB: + case SHT_STRTAB: + case SHT_RELA: + case SHT_HASH: + case SHT_DYNAMIC: + case SHT_NOTE: + case SHT_REL: + case SHT_DYNSYM: + case SHT_SYMTAB_SHNDX: + return true; + } + return false; +} + +template +std::error_code elfv2::ObjectFile::initializeChunks() { + auto Size = ELFObj->getNumSections(); + Chunks.reserve(Size); + SparseChunks.resize(Size); + int I = 0; + for (auto &&Sec : ELFObj->sections()) { + if (isIgnoredSectionType(Sec.sh_type) || Sec.sh_addralign == 0) { + ++I; + continue; + } + auto *C = new (Alloc) SectionChunk(this, &Sec, I); + Chunks.push_back(C); + SparseChunks[I] = C; + ++I; + } + return std::error_code(); +} + +template +std::error_code elfv2::ObjectFile::initializeSymbols() { + auto Syms = ELFObj->symbols(); + Syms = typename ELFFile::Elf_Sym_Range(Syms.begin() + 1, Syms.end()); + auto NumSymbols = std::distance(Syms.begin(), Syms.end()); + SymbolBodies.reserve(NumSymbols + 1); + SparseSymbolBodies.resize(NumSymbols + 1); + int I = 1; + for (auto &&Sym : Syms) { + SymbolBody *Body = createSymbolBody(&Sym); + if (Body) { + SymbolBodies.push_back(Body); + SparseSymbolBodies[I] = Body; + } + ++I; + } + + return std::error_code(); +} + +template +SymbolBody *elfv2::ObjectFile::createSymbolBody(const Elf_Sym *Sym) { + StringRef Name; + if (Sym->isUndefined()) { + Name = *ELFObj->getStaticSymbolName(Sym); + return new (Alloc) Undefined(Name); + } + if (Sym->isCommon()) { + Chunk *C = new (Alloc) CommonChunk(Sym); + Chunks.push_back(C); + return new (Alloc) DefinedRegular(ELFObj.get(), Sym, C); + } + if (Sym->isAbsolute()) { + Name = *ELFObj->getStaticSymbolName(Sym); + return new (Alloc) DefinedAbsolute(Name, Sym->getValue()); + } + if (Chunk *C = SparseChunks[Sym->st_shndx]) + return new (Alloc) DefinedRegular(ELFObj.get(), Sym, C); + return nullptr; +} + +std::error_code BitcodeFile::parse() { + std::string Err; + M.reset(LTOModule::createFromBuffer(MB.getBufferStart(), MB.getBufferSize(), + llvm::TargetOptions(), Err)); + if (!Err.empty()) { + llvm::errs() << Err << '\n'; + return make_error_code(LLDError::BrokenFile); + } + + for (unsigned I = 0, E = M->getSymbolCount(); I != E; ++I) { + lto_symbol_attributes Attrs = M->getSymbolAttributes(I); + if ((Attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL) + continue; + + StringRef SymName = M->getSymbolName(I); + int SymbolDef = Attrs & LTO_SYMBOL_DEFINITION_MASK; + if (SymbolDef == LTO_SYMBOL_DEFINITION_UNDEFINED) { + SymbolBodies.push_back(new (Alloc) Undefined(SymName)); + } else { + bool Replaceable = (SymbolDef == LTO_SYMBOL_DEFINITION_TENTATIVE || + (Attrs & LTO_SYMBOL_COMDAT)); + SymbolBodies.push_back(new (Alloc) DefinedBitcode(SymName, Replaceable)); + } + } + + return std::error_code(); +} + +namespace lld { +namespace elfv2 { +template class elfv2::ObjectFile; +template class elfv2::ObjectFile; +template class elfv2::ObjectFile; +template class elfv2::ObjectFile; +} +} Index: ELF/Options.td =================================================================== --- /dev/null +++ ELF/Options.td @@ -0,0 +1,54 @@ +include "llvm/Option/OptParser.td" + +//===----------------------------------------------------------------------===// +/// Utility Functions +//===----------------------------------------------------------------------===// +// Single and multiple dash options combined +multiclass smDash { + // Option + def "" : Separate<["-"], opt1>, HelpText; + def opt1_eq : Joined<["-"], opt1#"=">, + Alias(opt1)>; + // Compatibility aliases + def opt2_dashdash : Separate<["--"], opt2>, + Alias(opt1)>; + def opt2_dashdash_eq : Joined<["--"], opt2#"=">, + Alias(opt1)>; +} + +// Support -