Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -96,3 +96,4 @@ endif() add_subdirectory(docs) +add_subdirectory(coff) Index: COFF/CMakeLists.txt =================================================================== --- /dev/null +++ COFF/CMakeLists.txt @@ -0,0 +1,14 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(COFFOptionsTableGen) + +add_llvm_library(lldCOFF + Chunks.cpp + Driver.cpp + InputFiles.cpp + SymbolTable.cpp + Symbols.cpp + Writer.cpp + ) + +add_dependencies(lldCOFF COFFOptionsTableGen) Index: COFF/Chunks.h =================================================================== --- /dev/null +++ COFF/Chunks.h @@ -0,0 +1,254 @@ +//===- Chunks.h -----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_CHUNKS_H +#define LLD_COFF_CHUNKS_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/COFF.h" +#include +#include + +using llvm::COFF::ImportDirectoryTableEntry; +using llvm::object::COFFSymbolRef; +using llvm::object::SectionRef; +using llvm::object::coff_relocation; +using llvm::object::coff_section; +using llvm::sys::fs::file_magic; + +namespace lld { +namespace coff { + +class Defined; +class DefinedImportData; +class ObjectFile; +class OutputSection; + +// A Chunk represents a chunk of data that will occupy space in the +// output (if the resolver chose that). It may or may not be backed by +// a section of an input file. It could be linker-created data, or +// doesn't even have actual data (if common or bss). +class Chunk { +public: + virtual ~Chunk() {} + + // Returns the pointer to data. It is illegal to call this function if + // this is a common or BSS chunk. + virtual const uint8_t *getData() const { llvm_unreachable("internal error"); } + + // Returns the size of this chunk (even if this is a common or BSS.) + virtual size_t getSize() const = 0; + + // The writer sets and uses the addresses. + uint64_t getRVA() { return RVA; } + uint64_t getFileOff() { return FileOff; } + uint32_t getAlign() { return Align; } + void setRVA(uint64_t V) { RVA = V; } + void setFileOff(uint64_t V) { FileOff = V; } + + // Applies relocations, assuming Buffer points to beginning of an + // mmap'ed output file. Because this function uses file offsets and + // RVA values of other chunks, you need to set them properly before + // calling this function. + virtual void applyRelocations(uint8_t *Buf) {} + + // Returns true if getData() returns a valid pointer to data. + // BSS chunks return false. If false is returned, the space occupied + // by this chunk is filled with zeros. + virtual bool hasData() const { return true; } + + // Returns readable/writable/executable bits. + virtual uint32_t getPermissions() const { return 0; } + + // Returns the section name if this is a section chunk. + // It is illegal to call this function on non-section chunks. + virtual StringRef getSectionName() const { + llvm_unreachable("internal error"); + } + + // Called if the garbage collector decides to not include this chunk + // in a final output. It's supposed to print out a log message. It + // is illegal to call this function on non-section chunks because + // only section chunks are subject of garbage collection. + virtual void printDiscardedMessage() { llvm_unreachable("internal error"); } + + // Returns true if this is a COMDAT section. Usually, it is an error + // if there are more than one defined symbols having the same name, + // but symbols at begining of COMDAT sections allowed to duplicate. + virtual bool isCOMDAT() const { return false; } + + // Used by the garbage collector. + virtual bool isRoot() { return false; } + virtual bool isLive() { return true; } + virtual void markLive() {} + + // An output section has pointers to chunks in the section, and each + // chunk has a back pointer to an output section. + void setOutputSection(OutputSection *O) { Out = O; } + OutputSection *getOutputSection() { return Out; } + +protected: + // The RVA of this chunk in the output. The writer sets a value. + uint64_t RVA = 0; + + // The offset from beginning of the output file. The writer sets a + // value. + uint64_t FileOff = 0; + + // The alignment of this chunk. The writer uses the value. + uint32_t Align = 1; + + // The output section for this chunk. + OutputSection *Out = nullptr; +}; + +// A chunk representing a section of an input file. +class SectionChunk : public Chunk { +public: + SectionChunk(ObjectFile *File, const coff_section *Header, + uint32_t SectionIndex); + const uint8_t *getData() const override; + size_t getSize() const override { return Header->SizeOfRawData; } + void applyRelocations(uint8_t *Buf) override; + bool hasData() const override; + uint32_t getPermissions() const override; + StringRef getSectionName() const override { return SectionName; } + void printDiscardedMessage() override; + bool isCOMDAT() const override; + + bool isRoot() override; + void markLive() override; + bool isLive() override { return Live; } + + // Adds COMDAT associative sections to this COMDAT section. A chunk + // and its children are treated as a group by the garbage collector. + void addAssociative(SectionChunk *Child); + +private: + SectionRef getSectionRef(); + void applyReloc(uint8_t *Buf, const coff_relocation *Rel); + + // A file this chunk was created from. + ObjectFile *File; + + const coff_section *Header; + uint32_t SectionIndex; + StringRef SectionName; + bool Live = false; + std::vector AssocChildren; + bool IsAssocChild = false; +}; + +// A chunk for common symbols. Common chunks don't have actual data. +class CommonChunk : public Chunk { +public: + CommonChunk(const COFFSymbolRef S) : Sym(S) {} + size_t getSize() const override { return Sym.getValue(); } + bool hasData() const override { return false; } + uint32_t getPermissions() const override; + StringRef getSectionName() const override { return ".bss"; } + +private: + const COFFSymbolRef Sym; +}; + +// A chunk for linker-created strings. +class StringChunk : public Chunk { +public: + explicit StringChunk(StringRef S); + const uint8_t *getData() const override { return &Data[0]; } + size_t getSize() const override { return Data.size(); } + +private: + std::vector Data; +}; + +// All chunks below are for the DLL import descriptor table and +// Windows-specific. You may need to read the Microsoft PE/COFF spec +// to understand details about the data structures. + +static const uint8_t ImportFuncData[] = { + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0 +}; + +// A chunk for DLL import jump table entry. In a final output, it's +// contents will be a JMP instruction to some __imp_ symbol. +class ImportFuncChunk : public Chunk { +public: + explicit ImportFuncChunk(Defined *S) : ImpSymbol(S) {} + const uint8_t *getData() const override { return ImportFuncData; } + size_t getSize() const override { return sizeof(ImportFuncData); } + void applyRelocations(uint8_t *Buf) override; + +private: + Defined *ImpSymbol; +}; + +// A chunk for the import descriptor table. +class HintNameChunk : public Chunk { +public: + explicit HintNameChunk(StringRef Name); + const uint8_t *getData() const override { return Data.data(); } + size_t getSize() const override { return Data.size(); } + +private: + std::vector Data; +}; + +// A chunk for the import descriptor table. +class LookupChunk : public Chunk { +public: + explicit LookupChunk(HintNameChunk *H) : HintName(H) {} + bool hasData() const override { return false; } + size_t getSize() const override { return sizeof(uint64_t); } + void applyRelocations(uint8_t *Buf) override; + HintNameChunk *HintName; +}; + +// A chunk for the import descriptor table. +class DirectoryChunk : public Chunk { +public: + explicit DirectoryChunk(StringChunk *N) : DLLName(N) {} + bool hasData() const override { return false; } + size_t getSize() const override { return sizeof(ImportDirectoryTableEntry); } + void applyRelocations(uint8_t *Buf) override; + + StringChunk *DLLName; + LookupChunk *LookupTab; + LookupChunk *AddressTab; +}; + +// A chunk for the import descriptor table. +class NullChunk : public Chunk { +public: + explicit NullChunk(size_t N) : Size(N) {} + bool hasData() const override { return false; } + size_t getSize() const override { return Size; } + +private: + size_t Size; +}; + +// ImportTable creates a set of import table chunks for a given +// DLL-imported symbols. +class ImportTable { +public: + ImportTable(StringRef DLLName, std::vector &Symbols); + StringChunk *DLLName; + DirectoryChunk *DirTab; + std::vector LookupTables; + std::vector AddressTables; + std::vector HintNameTables; +}; + +} // namespace coff +} // namespace lld + +#endif Index: COFF/Chunks.cpp =================================================================== --- /dev/null +++ COFF/Chunks.cpp @@ -0,0 +1,205 @@ +//===- Chunks.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "InputFiles.h" +#include "Writer.h" +#include "lld/Core/Error.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::object; +using namespace llvm::support::endian; +using llvm::COFF::ImportHeader; +using llvm::RoundUpToAlignment; + +namespace lld { +namespace coff { + +SectionChunk::SectionChunk(ObjectFile *F, const coff_section *H, uint32_t SI) + : File(F), Header(H), SectionIndex(SI) { + File->getCOFFObj()->getSectionName(Header, SectionName); + // Bit [20:24] contains section alignment. + unsigned Shift = ((Header->Characteristics & 0xF00000) >> 20) - 1; + Align = uint32_t(1) << Shift; +} + +const uint8_t *SectionChunk::getData() const { + assert(hasData()); + ArrayRef Data; + File->getCOFFObj()->getSectionContents(Header, Data); + return Data.data(); +} + +// Returns true if this chunk should be considered as a GC root. +bool SectionChunk::isRoot() { + if (isCOMDAT()) + return false; + + // Associative sections are live if their parent COMDATs are live, + // and vice versa, so they are not considered live by themselves. + if (IsAssocChild) + return false; + + // Only code is subject of dead-stripping. + return !(Header->Characteristics & llvm::COFF::IMAGE_SCN_CNT_CODE); +} + +void SectionChunk::markLive() { + if (Live) + return; + Live = true; + + // Mark all symbols listed in the relocation table for this section. + for (const auto &I : getSectionRef().relocations()) { + const coff_relocation *Rel = File->getCOFFObj()->getCOFFRelocation(I); + SymbolBody *B = File->getSymbolBody(Rel->SymbolTableIndex); + if (auto *Def = dyn_cast(B)) + Def->markLive(); + } + + // Mark associative sections if any. + for (Chunk *C : AssocChildren) + C->markLive(); +} + +void SectionChunk::addAssociative(SectionChunk *Child) { + Child->IsAssocChild = true; + AssocChildren.push_back(Child); +} + +void SectionChunk::applyRelocations(uint8_t *Buf) { + for (const auto &I : getSectionRef().relocations()) { + const coff_relocation *Rel = File->getCOFFObj()->getCOFFRelocation(I); + applyReloc(Buf, Rel); + } +} + +static void add16(uint8_t *P, int32_t V) { write16le(P, read16le(P) + V); } +static void add32(uint8_t *P, int32_t V) { write32le(P, read32le(P) + V); } +static void add64(uint8_t *P, int64_t V) { write64le(P, read64le(P) + V); } + +// Implements x64 PE/COFF relocations. +void SectionChunk::applyReloc(uint8_t *Buf, const coff_relocation *Rel) { + using namespace llvm::COFF; + uint8_t *Off = Buf + FileOff + Rel->VirtualAddress; + SymbolBody *Body = File->getSymbolBody(Rel->SymbolTableIndex); + uint64_t S = cast(Body)->getRVA(); + uint64_t P = RVA + Rel->VirtualAddress; + switch (Rel->Type) { + case IMAGE_REL_AMD64_ADDR32: add32(Off, S + Config->ImageBase); break; + case IMAGE_REL_AMD64_ADDR64: add64(Off, S + Config->ImageBase); break; + case IMAGE_REL_AMD64_ADDR32NB: add32(Off, S); break; + case IMAGE_REL_AMD64_REL32: add32(Off, S - P - 4); break; + case IMAGE_REL_AMD64_REL32_1: add32(Off, S - P - 5); break; + case IMAGE_REL_AMD64_REL32_2: add32(Off, S - P - 6); break; + case IMAGE_REL_AMD64_REL32_3: add32(Off, S - P - 7); break; + case IMAGE_REL_AMD64_REL32_4: add32(Off, S - P - 8); break; + case IMAGE_REL_AMD64_REL32_5: add32(Off, S - P - 9); break; + case IMAGE_REL_AMD64_SECTION: add16(Off, Out->getSectionIndex()); break; + case IMAGE_REL_AMD64_SECREL: add32(Off, S - Out->getRVA()); break; + default: + llvm::report_fatal_error("Unsupported relocation type"); + } +} + +bool SectionChunk::hasData() const { + return !(Header->Characteristics & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA); +} + +uint32_t SectionChunk::getPermissions() const { + return Header->Characteristics & PermMask; +} + +bool SectionChunk::isCOMDAT() const { + return Header->Characteristics & llvm::COFF::IMAGE_SCN_LNK_COMDAT; +} + +// Prints "Discarded " for all external function symbols. +void SectionChunk::printDiscardedMessage() { + uint32_t E = File->getCOFFObj()->getNumberOfSymbols(); + for (uint32_t I = 0; I < E; ++I) { + auto SrefOrErr = File->getCOFFObj()->getSymbol(I); + COFFSymbolRef Sym = SrefOrErr.get(); + if (Sym.getSectionNumber() != SectionIndex) + continue; + if (!Sym.isFunctionDefinition()) + continue; + StringRef SymbolName; + File->getCOFFObj()->getSymbolName(Sym, SymbolName); + llvm::dbgs() << "Discarded " << SymbolName << " from " + << File->getShortName() << "\n"; + I += Sym.getNumberOfAuxSymbols(); + } +} + +SectionRef SectionChunk::getSectionRef() { + DataRefImpl Ref; + Ref.p = uintptr_t(Header); + return SectionRef(Ref, File->getCOFFObj()); +} + +uint32_t CommonChunk::getPermissions() const { + using namespace llvm::COFF; + return IMAGE_SCN_CNT_UNINITIALIZED_DATA | IMAGE_SCN_MEM_READ | + IMAGE_SCN_MEM_WRITE; +} + +StringChunk::StringChunk(StringRef S) : Data(S.size() + 1) { + memcpy(Data.data(), S.data(), S.size()); + Data[S.size()] = 0; +} + +void ImportFuncChunk::applyRelocations(uint8_t *Buf) { + uint32_t Operand = ImpSymbol->getRVA() - RVA - getSize(); + // The first two bytes are a JMP instruction. Fill its operand. + write32le(Buf + FileOff + 2, Operand); +} + +HintNameChunk::HintNameChunk(StringRef Name) + : Data(RoundUpToAlignment(Name.size() + 4, 2)) { + memcpy(&Data[2], Name.data(), Name.size()); +} + +void LookupChunk::applyRelocations(uint8_t *Buf) { + write32le(Buf + FileOff, HintName->getRVA()); +} + +void DirectoryChunk::applyRelocations(uint8_t *Buf) { + auto *E = (coff_import_directory_table_entry *)(Buf + FileOff); + E->ImportLookupTableRVA = LookupTab->getRVA(); + E->NameRVA = DLLName->getRVA(); + E->ImportAddressTableRVA = AddressTab->getRVA(); +} + +ImportTable::ImportTable(StringRef N, + std::vector &Symbols) { + DLLName = new StringChunk(N); + DirTab = new DirectoryChunk(DLLName); + for (DefinedImportData *S : Symbols) + HintNameTables.push_back(new HintNameChunk(S->getExportName())); + + for (HintNameChunk *H : HintNameTables) { + LookupTables.push_back(new LookupChunk(H)); + AddressTables.push_back(new LookupChunk(H)); + } + + for (int I = 0, E = Symbols.size(); I < E; ++I) + Symbols[I]->setLocation(AddressTables[I]); + + DirTab->LookupTab = LookupTables[0]; + DirTab->AddressTab = AddressTables[0]; +} + +} // namespace coff +} // namespace lld Index: COFF/Config.h =================================================================== --- /dev/null +++ COFF/Config.h @@ -0,0 +1,40 @@ +//===- Config.h -----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_CONFIG_H +#define LLD_COFF_CONFIG_H + +#include "llvm/ADT/StringRef.h" +#include +#include +#include + +namespace lld { +namespace coff { + +class Configuration { +public: + bool Verbose = false; + std::string EntryName = "mainCRTStartup"; + uint64_t ImageBase = 0x140000000; + + bool insertFile(llvm::StringRef Path) { + return VisitedFiles.insert(Path.lower()).second; + } + +private: + std::set VisitedFiles; +}; + +extern Configuration *Config; + +} // namespace coff +} // namespace lld + +#endif Index: COFF/Driver.h =================================================================== --- /dev/null +++ COFF/Driver.h @@ -0,0 +1,31 @@ +//===- Driver.h -----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_DRIVER_H +#define LLD_COFF_DRIVER_H + +#include "Memory.h" +#include "llvm/ADT/StringRef.h" +#include +#include +#include + +namespace lld { +namespace coff { + +class InputFile; + +std::error_code parseDirectives(StringRef S, + std::vector> *Res, + StringAllocator *Alloc); + +} // namespace coff +} // namespace lld + +#endif Index: COFF/Driver.cpp =================================================================== --- /dev/null +++ COFF/Driver.cpp @@ -0,0 +1,244 @@ +//===- Driver.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "InputFiles.h" +#include "Memory.h" +#include "SymbolTable.h" +#include "Writer.h" +#include "lld/Core/Error.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Object/COFF.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +// Create enum with OPT_xxx values for each option in Options.td +enum { + OPT_INVALID = 0, +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELP, META) \ + OPT_##ID, +#include "Options.inc" +#undef OPTION +}; + +// Create prefix string literals used in Options.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Options.inc" +#undef PREFIX + +// Create table mapping all options defined in Options.td +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR) \ + { \ + PREFIX, NAME, HELPTEXT, METAVAR, OPT_##ID, llvm::opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, OPT_##ALIAS, ALIASARGS \ + } \ + , +#include "Options.inc" +#undef OPTION +}; + +namespace { + +class COFFOptTable : public llvm::opt::OptTable { +public: + COFFOptTable() : OptTable(infoTable, llvm::array_lengthof(infoTable), true) {} +}; + +class BumpPtrStringSaver : public llvm::cl::StringSaver { +public: + BumpPtrStringSaver(lld::coff::StringAllocator *A) : Alloc(A) {} + + const char *SaveString(const char *S) override { + return Alloc->save(S).data(); + } + +private: + lld::coff::StringAllocator *Alloc; +}; +} + +static std::string getOutputPath(llvm::opt::InputArgList *Args) { + if (auto *Arg = Args->getLastArg(OPT_out)) + return Arg->getValue(); + for (auto *Arg : Args->filtered(OPT_INPUT)) { + if (!StringRef(Arg->getValue()).endswith_lower(".obj")) + continue; + SmallString<128> Val = Arg->getValue(); + llvm::sys::path::replace_extension(Val, ".exe"); + return Val.str(); + } + llvm_unreachable("internal error"); +} + +// Split the given string with the path separator. +static std::vector splitPathList(StringRef str) { + std::vector ret; + while (!str.empty()) { + StringRef path; + std::tie(path, str) = str.split(';'); + ret.push_back(path); + } + return ret; +} + +namespace lld { +namespace coff { + +Configuration *Config; + +ErrorOr> +parseArgs(int Argc, const char *Argv[]) { + COFFOptTable Table; + unsigned MissingIndex; + unsigned MissingCount; + std::unique_ptr Args( + Table.ParseArgs(&Argv[1], &Argv[Argc], MissingIndex, MissingCount)); + if (MissingCount) { + std::string S; + llvm::raw_string_ostream OS(S); + OS << llvm::format("missing arg value for \"%s\", expected %d argument%s.", + Args->getArgString(MissingIndex), MissingCount, + (MissingCount == 1 ? "" : "s")); + OS.flush(); + return make_dynamic_error_code(StringRef(S)); + } + for (auto *Arg : Args->filtered(OPT_UNKNOWN)) + llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; + return std::move(Args); +} + +std::string findLib(StringRef Filename) { + if (llvm::sys::fs::exists(Filename)) + return Filename; + std::string Name; + if (Filename.endswith_lower(".lib")) { + Name = Filename; + } else { + Name = (Filename + ".lib").str(); + } + + llvm::Optional Env = llvm::sys::Process::GetEnv("LIB"); + if (!Env.hasValue()) + return Filename; + for (StringRef Dir : splitPathList(*Env)) { + SmallString<128> Path = Dir; + llvm::sys::path::append(Path, Name); + if (llvm::sys::fs::exists(Path.str())) + return Path.str(); + } + return Filename; +} + +std::string findFile(StringRef Filename) { + if (llvm::sys::fs::exists(Filename)) + return Filename; + llvm::Optional Env = llvm::sys::Process::GetEnv("LIB"); + if (!Env.hasValue()) + return Filename; + for (StringRef Dir : splitPathList(*Env)) { + SmallString<128> Path = Dir; + llvm::sys::path::append(Path, Filename); + if (llvm::sys::fs::exists(Path.str())) + return Path.str(); + } + return Filename; +} + +std::unique_ptr createFile(StringRef Path) { + if (StringRef(Path).endswith_lower(".lib")) + return llvm::make_unique(Path); + return llvm::make_unique(Path); +} + +// Parses .drectve section contents and returns a list of files +// specified by /defaultlib. +std::error_code parseDirectives(StringRef S, + std::vector> *Res, + StringAllocator *Alloc) { + SmallVector Tokens; + Tokens.push_back("link"); // argv[0] value. Will be ignored. + BumpPtrStringSaver Saver(Alloc); + llvm::cl::TokenizeWindowsCommandLine(S, Saver, Tokens); + Tokens.push_back(nullptr); + int Argc = Tokens.size() - 1; + const char **Argv = &Tokens[0]; + + auto ArgsOrErr = parseArgs(Argc, Argv); + if (auto EC = ArgsOrErr.getError()) + return EC; + std::unique_ptr Args = std::move(ArgsOrErr.get()); + + for (auto *Arg : Args->filtered(OPT_defaultlib)) { + std::string Path = findLib(Arg->getValue()); + if (!Config->insertFile(Path)) + continue; + Res->push_back(llvm::make_unique(Path)); + } + return std::error_code(); +} + +bool link(int Argc, const char *Argv[]) { + // Parse command line options. + Config = new Configuration(); + auto ArgsOrErr = parseArgs(Argc, Argv); + if (auto EC = ArgsOrErr.getError()) { + llvm::errs() << EC.message() << "\n"; + return false; + } + std::unique_ptr Args = std::move(ArgsOrErr.get()); + + if (Args->filtered_begin(OPT_INPUT) == Args->filtered_end()) { + llvm::errs() << "no input files.\n"; + return false; + } + if (Args->hasArg(OPT_verbose)) + Config->Verbose = true; + if (auto *Arg = Args->getLastArg(OPT_entry)) + Config->EntryName = Arg->getValue(); + + // Parse all input files and put all symbols to the symbol table. + // The symbol table will take care of name resolution. + SymbolTable Symtab; + for (auto *Arg : Args->filtered(OPT_INPUT)) { + std::string Path = findFile(Arg->getValue()); + if (!Config->insertFile(Path)) + continue; + if (auto EC = Symtab.addFile(createFile(Path))) { + llvm::errs() << Path << ": " << EC.message() << "\n"; + return false; + } + } + if (Symtab.reportRemainingUndefines()) + return false; + + // Write the result. + Writer Out(&Symtab); + if (auto EC = Out.write(getOutputPath(Args.get()))) { + llvm::errs() << EC.message() << "\n"; + return false; + } + return true; +} +} +} Index: COFF/InputFiles.h =================================================================== --- /dev/null +++ COFF/InputFiles.h @@ -0,0 +1,170 @@ +//===- InputFiles.h -------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_INPUT_FILES_H +#define LLD_COFF_INPUT_FILES_H + +#include "Chunks.h" +#include "Memory.h" +#include "Symbols.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/COFF.h" +#include +#include +#include + +using llvm::object::Archive; +using llvm::object::COFFObjectFile; + +namespace lld { +namespace coff { + +// The root class of input files. +class InputFile { +public: + enum Kind { ArchiveKind, ObjectKind, ImportKind }; + Kind kind() const { return FileKind; } + virtual ~InputFile() {} + + // Returns the filename. + virtual StringRef getName() = 0; + + // Returns symbols defined by this file. + virtual std::vector &getSymbols() = 0; + + // Reads a file (constructors don't do that). Returns an error if a + // file is broken. + virtual std::error_code parse() = 0; + + // Returns a short, human-friendly filename. If this is a member of + // an archive file, a returned value includes parent's filename. + // Used for logging or debugging. + std::string getShortName(); + + // Sets a parent filename if this file is created from an archive. + void setParentName(StringRef N) { ParentName = N; } + +protected: + explicit InputFile(Kind K) : FileKind(K) {} + +private: + const Kind FileKind; + StringRef ParentName; +}; + +// .lib or .a file. +class ArchiveFile : public InputFile { +public: + explicit ArchiveFile(StringRef S) : InputFile(ArchiveKind), Name(S) {} + static bool classof(const InputFile *F) { return F->kind() == ArchiveKind; } + std::error_code parse() override; + StringRef getName() override { return Name; } + + // Returns a memory buffer for a given symbol. An empty memory + // buffer is returned if we have already returned the same memory + // buffer. (So that we don't instantiate same members more than + // once.) + ErrorOr getMember(const Archive::Symbol *Sym); + + // NB: All symbols returned by ArchiveFiles are of CanBeDefined type. + std::vector &getSymbols() override { return SymbolBodies; } + +private: + std::unique_ptr File; + std::string Name; + std::unique_ptr MB; + std::vector SymbolBodies; + std::set Seen; + llvm::MallocAllocator Alloc; +}; + +// .obj or .o file. This may be a member of an archive file. +class ObjectFile : public InputFile { +public: + explicit ObjectFile(StringRef S) : InputFile(ObjectKind), Name(S) {} + ObjectFile(StringRef S, MemoryBufferRef M) + : InputFile(ObjectKind), Name(S), MBRef(M) {} + + static bool classof(const InputFile *F) { return F->kind() == ObjectKind; } + std::error_code parse() override; + StringRef getName() override { return Name; } + std::vector &getChunks() { return Chunks; } + std::vector &getSymbols() override { return SymbolBodies; } + + // Returns a SymbolBody object for the SymbolIndex'th symbol in the + // underlying object file. + SymbolBody *getSymbolBody(uint32_t SymbolIndex); + + // Returns .drectve section contents if exist. + StringRef getDirectives() { return Directives; } + + // Returns the underying COFF file. + COFFObjectFile *getCOFFObj() { return COFFObj.get(); } + +private: + std::error_code initializeChunks(); + std::error_code initializeSymbols(); + + SymbolBody *createSymbolBody(StringRef Name, COFFSymbolRef Sym, + const void *Aux, bool IsFirst); + + std::string Name; + std::unique_ptr COFFObj; + std::unique_ptr MB; + MemoryBufferRef MBRef; + StringRef Directives; + llvm::BumpPtrAllocator Alloc; + + // List of all chunks defined by this file. The first chunks + // represents sections which may be followed by other non-section + // chunks such as common symbols. + std::vector Chunks; + + // This vector contains the same chunks as Chunks, but they are + // indexed such that you can get a SectionChunk by section + // index. Nonexistent section indices are filled with null pointers. + // (Because section number is 1-based, the first slot is always a + // null pointer.) + std::vector SparseChunks; + + // List of all symbols referenced or defined by this file. + std::vector SymbolBodies; + + // This vector contains the same symbols as SymbolBodies, but they + // are indexed such that you can get a SymbolBody by symbol + // index. Nonexistent indices (which are occupied by auxiliary + // symbols in the real symbol table) are filled by null pointers. + std::vector SparseSymbolBodies; +}; + +// This type represents import library members that contain DLL names +// and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7 +// for details about the format. +class ImportFile : public InputFile { +public: + explicit ImportFile(MemoryBufferRef M) : InputFile(ImportKind), MBRef(M) {} + static bool classof(const InputFile *F) { return F->kind() == ImportKind; } + StringRef getName() override { return MBRef.getBufferIdentifier(); } + std::vector &getSymbols() override { return SymbolBodies; } + +private: + std::error_code parse() override; + + MemoryBufferRef MBRef; + std::vector SymbolBodies; + llvm::BumpPtrAllocator Alloc; + StringAllocator StringAlloc; +}; + +} // namespace coff +} // namespace lld + +#endif Index: COFF/InputFiles.cpp =================================================================== --- /dev/null +++ COFF/InputFiles.cpp @@ -0,0 +1,248 @@ +//===- InputFiles.cpp -----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Chunks.h" +#include "InputFiles.h" +#include "Writer.h" +#include "lld/Core/Error.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::object; +using namespace llvm::support::endian; +using llvm::COFF::ImportHeader; +using llvm::RoundUpToAlignment; +using llvm::sys::fs::identify_magic; +using llvm::sys::fs::file_magic; + +namespace lld { +namespace coff { + +static StringRef basename(StringRef Path) { + size_t Pos = Path.rfind('\\'); + if (Pos == StringRef::npos) + return Path; + return Path.substr(Pos + 1); +} + +std::string InputFile::getShortName() { + StringRef Name = getName(); + if (ParentName == "") + return Name.lower(); + return StringRef((basename(ParentName) + "(" + basename(Name) + ")").str()) + .lower(); +} + +std::error_code ArchiveFile::parse() { + // Get a memory buffer. + auto MBOrErr = MemoryBuffer::getFile(Name); + if (auto EC = MBOrErr.getError()) + return EC; + MB = std::move(MBOrErr.get()); + + // Parse a memory buffer as an archive file. + auto ArchiveOrErr = Archive::create(MB->getMemBufferRef()); + if (auto EC = ArchiveOrErr.getError()) + return EC; + File = std::move(ArchiveOrErr.get()); + + // Allocate a buffer for CanBeDefined objects. + size_t BufSize = File->getNumberOfSymbols() * sizeof(CanBeDefined); + CanBeDefined *Buf = + (CanBeDefined *)Alloc.Allocate(BufSize, llvm::alignOf()); + + // Read the symbol table to construct CanBeDefined objects. + uint32_t I = 0; + for (const Archive::Symbol &Sym : File->symbols()) { + // Skip special symbol exists in import library files. + if (Sym.getName() == "__NULL_IMPORT_DESCRIPTOR") + continue; + SymbolBodies.push_back(new (&Buf[I++]) CanBeDefined(this, Sym)); + } + return std::error_code(); +} + +// Returns a buffer pointing to a member file containing a given symbol. +ErrorOr ArchiveFile::getMember(const Archive::Symbol *Sym) { + auto ItOrErr = Sym->getMember(); + if (auto EC = ItOrErr.getError()) + return EC; + Archive::child_iterator It = ItOrErr.get(); + + // Return an empty buffer if we have already returned the same buffer. + const char *StartAddr = It->getBuffer().data(); + auto Pair = Seen.insert(StartAddr); + if (!Pair.second) + return MemoryBufferRef(); + + auto MBRefOrErr = It->getMemoryBufferRef(); + if (auto EC = MBRefOrErr.getError()) + return EC; + return MBRefOrErr.get(); +} + +std::error_code ObjectFile::parse() { + // MBRef is not initialized if this is not an archive member. + if (MBRef.getBuffer().empty()) { + auto MBOrErr = MemoryBuffer::getFile(Name); + if (auto EC = MBOrErr.getError()) + return EC; + MB = std::move(MBOrErr.get()); + MBRef = MB->getMemBufferRef(); + } + + // Parse a memory buffer as a COFF file. + auto BinOrErr = createBinary(MBRef); + if (auto EC = BinOrErr.getError()) + return EC; + std::unique_ptr Bin = std::move(BinOrErr.get()); + + if (!isa(Bin.get())) + return make_dynamic_error_code(Twine(Name) + " is not a COFF file."); + COFFObj.reset(cast(Bin.release())); + + // Read section and symbol tables. + if (auto EC = initializeChunks()) + return EC; + if (auto EC = initializeSymbols()) + return EC; + return std::error_code(); +} + +SymbolBody *ObjectFile::getSymbolBody(uint32_t SymbolIndex) { + return SparseSymbolBodies[SymbolIndex]->getReplacement(); +} + +std::error_code ObjectFile::initializeChunks() { + uint32_t NumSections = COFFObj->getNumberOfSections(); + Chunks.reserve(NumSections); + SparseChunks.resize(NumSections + 1); + for (uint32_t I = 1; I < NumSections + 1; ++I) { + const coff_section *Sec; + StringRef Name; + if (auto EC = COFFObj->getSection(I, Sec)) + return make_dynamic_error_code(Twine("getSection failed: ") + Name + + ": " + EC.message()); + if (auto EC = COFFObj->getSectionName(Sec, Name)) + return make_dynamic_error_code(Twine("getSectionName failed: ") + Name + + ": " + EC.message()); + if (Name == ".drectve") { + ArrayRef Data; + COFFObj->getSectionContents(Sec, Data); + Directives = StringRef((char *)Data.data(), Data.size()).trim(); + continue; + } + if (Name.startswith(".debug")) + continue; + if (Sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) + continue; + auto *C = new (Alloc) SectionChunk(this, Sec, I); + Chunks.push_back(C); + SparseChunks[I] = C; + } + return std::error_code(); +} + +std::error_code ObjectFile::initializeSymbols() { + uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); + SymbolBodies.reserve(NumSymbols); + SparseSymbolBodies.resize(NumSymbols); + int32_t LastSectionNumber = 0; + for (uint32_t I = 0; I < NumSymbols; ++I) { + // Get a COFFSymbolRef object. + auto SymOrErr = COFFObj->getSymbol(I); + if (auto EC = SymOrErr.getError()) + return make_dynamic_error_code(Twine("broken object file: ") + Name + + ": " + EC.message()); + COFFSymbolRef Sym = SymOrErr.get(); + + // Get a symbol name. + StringRef SymbolName; + if (auto EC = COFFObj->getSymbolName(Sym, SymbolName)) + return make_dynamic_error_code(Twine("broken object file: ") + Name + + ": " + EC.message()); + // Skip special symbols. + if (SymbolName == "@comp.id" || SymbolName == "@feat.00") + continue; + + const void *AuxP = nullptr; + if (Sym.getNumberOfAuxSymbols()) + AuxP = COFFObj->getSymbol(I + 1)->getRawPtr(); + bool IsFirst = (LastSectionNumber != Sym.getSectionNumber()); + + SymbolBody *Body = createSymbolBody(SymbolName, Sym, AuxP, IsFirst); + if (Body) { + SymbolBodies.push_back(Body); + SparseSymbolBodies[I] = Body; + } + I += Sym.getNumberOfAuxSymbols(); + LastSectionNumber = Sym.getSectionNumber(); + } + return std::error_code(); +} + +SymbolBody *ObjectFile::createSymbolBody(StringRef Name, COFFSymbolRef Sym, + const void *AuxP, bool IsFirst) { + if (Sym.isUndefined()) + return new Undefined(Name); + if (Sym.isCommon()) { + Chunk *C = new (Alloc) CommonChunk(Sym); + Chunks.push_back(C); + return new (Alloc) DefinedRegular(this, Name, Sym, C); + } + if (Sym.getSectionNumber() == -1) { + return new (Alloc) DefinedAbsolute(Name, Sym.getValue()); + } + if (Sym.isWeakExternal()) { + auto *Aux = (const coff_aux_weak_external *)AuxP; + return new (Alloc) Undefined(Name, &SparseSymbolBodies[Aux->TagIndex]); + } + if (IsFirst && AuxP) { + if (Chunk *C = SparseChunks[Sym.getSectionNumber()]) { + auto *Aux = (coff_aux_section_definition *)AuxP; + auto *Parent = + (SectionChunk *)(SparseChunks[Aux->getNumber(Sym.isBigObj())]); + if (Parent) + Parent->addAssociative((SectionChunk *)C); + } + } + if (Chunk *C = SparseChunks[Sym.getSectionNumber()]) + return new (Alloc) DefinedRegular(this, Name, Sym, C); + return nullptr; +} + +std::error_code ImportFile::parse() { + const char *Buf = MBRef.getBufferStart(); + const char *End = MBRef.getBufferEnd(); + + // The size of the string that follows the header. + uint32_t DataSize = read32le(Buf + offsetof(ImportHeader, SizeOfData)); + + // Check if the total size is valid. + if (size_t(End - Buf) != sizeof(ImportHeader) + DataSize) + return make_dynamic_error_code("broken import library"); + + StringRef Name = StringAlloc.save(StringRef(Buf + sizeof(ImportHeader))); + StringRef ImpName = StringAlloc.save(Twine("__imp_") + Name); + StringRef DLLName(Buf + sizeof(ImportHeader) + Name.size() + 1); + auto *ImpSym = new (Alloc) DefinedImportData(DLLName, ImpName, Name); + SymbolBodies.push_back(ImpSym); + + uint16_t TypeInfo = read16le(Buf + offsetof(ImportHeader, TypeInfo)); + int Type = TypeInfo & 0x3; + if (Type == llvm::COFF::IMPORT_CODE) + SymbolBodies.push_back(new (Alloc) DefinedImportFunc(Name, ImpSym)); + return std::error_code(); +} +} +} Index: COFF/Memory.h =================================================================== --- /dev/null +++ COFF/Memory.h @@ -0,0 +1,42 @@ +//===- Memory.h -----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_MEMORY_H +#define LLD_COFF_MEMORY_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Allocator.h" +#include + +namespace lld { +namespace coff { + +class StringAllocator { +public: + // Returns a null-terminated copy of a string. + StringRef save(StringRef S) { + char *P = Alloc.Allocate(S.size() + 1); + memcpy(P, S.data(), S.size()); + P[S.size()] = '\0'; + return StringRef(P, S.size()); + } + + StringRef save(Twine S) { return save(StringRef(S.str())); } + StringRef save(const char *S) { return save(StringRef(S)); } + +private: + llvm::BumpPtrAllocator Alloc; +}; + +} // namespace coff +} // namespace lld + +#endif Index: COFF/Options.td =================================================================== --- /dev/null +++ COFF/Options.td @@ -0,0 +1,120 @@ +include "llvm/Option/OptParser.td" + +// link.exe accepts options starting with either a dash or a slash. + +// Flag that takes no arguments. +class F : Flag<["/", "-", "-?"], name>; + +// Flag that takes one argument after ":". +class P : + Joined<["/", "-", "-?"], name#":">, HelpText; + +// Boolean flag suffixed by ":no". +multiclass B { + def "" : F; + def _no : F, HelpText; +} + +def alternatename : P<"alternatename", "Define weak alias">; +def base : P<"base", "Base address of the program">; +def defaultlib : P<"defaultlib", "Add the library to the list of input files">; +def nodefaultlib : P<"nodefaultlib", "Remove a default library">; +def disallowlib : Joined<["/", "-", "-?"], "disallowlib:">, Alias; +def entry : P<"entry", "Name of entry point symbol">; +// No help text because /failifmismatch is not intended to be used by the user. +def export : P<"export", "Export a function">; +def failifmismatch : P<"failifmismatch", "">; +def heap : P<"heap", "Size of the heap">; +def align : P<"align", "Section alignment">; +def libpath : P<"libpath", "Additional library search path">; +def mllvm : P<"mllvm", "Options to pass to LLVM">; +def out : P<"out", "Path to file to write output">; +def stack : P<"stack", "Size of the stack">; +def machine : P<"machine", "Specify target platform">; +def version : P<"version", "Specify a version number in the PE header">; +def merge : P<"merge", "Combine sections">; +def section : P<"section", "Specify section attributes">; +def subsystem : P<"subsystem", "Specify subsystem">; +def stub : P<"stub", "Specify DOS stub file">; +def opt : P<"opt", "Control optimizations">; +def implib : P<"implib", "Import library name">; +def delayload : P<"delayload", "Delay loaded DLL name">; +def pdb : P<"pdb", "PDB file path">; + +def manifest : F<"manifest">; +def manifest_colon : P<"manifest", "Create manifest file">; +def manifestuac : P<"manifestuac", "User access control">; +def manifestfile : P<"manifestfile", "Manifest file path">; +def manifestdependency : P<"manifestdependency", + "Attributes for in manifest file">; + +// We cannot use multiclass P because class name "incl" is different +// from its command line option name. We do this because "include" is +// a reserved keyword in tablegen. +def incl : Joined<["/", "-"], "include:">, + HelpText<"Force symbol to be added to symbol table as undefined one">; + +// "def" is also a keyword. +def deffile : Joined<["/", "-"], "def:">, + HelpText<"Use module-definition file">; + +def nodefaultlib_all : F<"nodefaultlib">; +def noentry : F<"noentry">; +def dll : F<"dll">; +def verbose : F<"verbose">; +def debug : F<"debug">; +def swaprun_cd : F<"swaprun:cd">; +def swaprun_net : F<"swaprun:net">; +def profile : F<"profile">; + +def force : F<"force">, + HelpText<"Allow undefined symbols when creating executables">; +def force_unresolved : F<"force:unresolved">; + +defm nxcompat : B<"nxcompat", "Disable data execution provention">; +defm largeaddressaware : B<"largeaddressaware", "Disable large addresses">; +defm allowbind: B<"allowbind", "Disable DLL binding">; +defm fixed : B<"fixed", "Enable base relocations">; +defm tsaware : B<"tsaware", "Create non-Terminal Server aware executable">; +defm allowisolation : B<"allowisolation", "Set NO_ISOLATION bit">; +defm dynamicbase : B<"dynamicbase", + "Disable address space layout randomization">; +defm safeseh : B<"safeseh", "Produce an image with Safe Exception Handler">; +defm highentropyva : B<"highentropyva", "Set HIGH_ENTROPY_VA bit">; + +def help : F<"help">; +def help_q : Flag<["/?", "-?"], "">, Alias; + +def DASH_DASH : Option<["--"], "", KIND_REMAINING_ARGS>; + +// Flag for debug +def lldmoduledeffile : Joined<["/", "-"], "lldmoduledeffile:">; + +//============================================================================== +// The flags below do nothing. They are defined only for link.exe compatibility. +//============================================================================== + +class QF : Joined<["/", "-", "-?"], name#":">; + +multiclass QB { + def "" : F; + def _no : F; +} + +def functionpadmin : F<"functionpadmin">; +def ignoreidl : F<"ignoreidl">; +def incremental : F<"incremental">; +def no_incremental : F<"incremental:no">; +def nologo : F<"nologo">; + +def delay : QF<"delay">; +def errorreport : QF<"errorreport">; +def idlout : QF<"idlout">; +def ignore : QF<"ignore">; +def maxilksize : QF<"maxilksize">; +def pdbaltpath : QF<"pdbaltpath">; +def tlbid : QF<"tlbid">; +def tlbout : QF<"tlbout">; +def verbose_all : QF<"verbose">; + +defm wx : QB<"wx">; Index: COFF/README.md =================================================================== --- /dev/null +++ COFF/README.md @@ -0,0 +1,194 @@ +The New PE/COFF Linker +====================== + +This directory contains an experimental linker for the PE/COFF file +format. Because the fundamental design of this port is different from +the other ports of the LLD, this port is separated to this directory. + +The other ports are based on the Atom model, in which symbols and +references are represented as vertices and edges of graphs. The port +in this directory is on the other hand based on sections. The aim is +simplicity and better performance. Our plan is to implement a linker +for the PE/COFF format based on a different idea, and then apply the +same idea to the ELF if proved to be effective. + +Overall Design +-------------- + +This is a list of important data types in this linker. + +* SymbolBody + + SymbolBody is a class for symbols, which may be created for symbols + in object files or in archive file headers. The linker may create + them out of nothing. + + There are mainly three types of SymbolBodies: Defined, Undefined, or + CanBeDefined. Defined symbols are for all symbols that are + considered as "resolved", including real defined symbols, COMDAT + symbols, common symbols, absolute symbols, linker-created symbols, + etc. Undefined symbols are for undefined symbols, which need to be + replaced by Defined symbols by the resolver. CanBeDefined symbols + represent symbols we found in archive file headers -- which can + turn into Defined symbols if we read archieve members, but we + haven't done that yet. + +* Symbol + + Symbol is a pointer to a SymbolBody. There's only one Symbol for + each unique symbol name (this uniqueness is guaranteed by the symbol + table). Because SymbolBodies are created for each file + independently, there can be many SymbolBodies for the same + name. Thus, the relationship between Symbols and SymbolBodies is 1:N. + + The resolver keeps the Symbol's pointer to always point to the "best" + SymbolBody. Pointer mutation is the resolve operation in this + linker. + + SymbolBodies have pointers to their Symbols. That means you can + always find the best SymbolBody from any SymbolBody by following + pointers twice. This structure makes it very easy to find + replacements for symbols. For example, if you have an Undefined + SymbolBody, you can find a Defined SymbolBody for that symbol just + by going to its Symbol and then to SymbolBody, assuming the resolver + have successfully resolved all undefined symbols. + +* Chunk + + Chunk represents a chunk of data that will occupy space in an + output. They may be backed by sections of input files, but can be + created for something different, if they are for common or BSS + symbols. The linker may also create chunks out of nothing to append + additional data to an output. + + Chunks know about their size, how to copy their data to mmap'ed + outputs, and how to apply relocations to them. Specifically, + section-based chunks know how to read relocation tables and how to + apply them. + +* SymbolTable + + SymbolTable is basically a hash table from strings to Symbols, with + a logic to resolve symbol conflicts. It resolves conflicts by symbol + type. For example, if we add Undefined and Defined symbols, the + symbol table will keep the latter. If we add Undefined and + CanBeDefined symbols, it will keep the latter. If we add + CanBeDefined and Undefined, it will keep the former, but it will + also trigger the CanBeDefined symbol to load the archive member to + actually resolve the symbol. + +* OutputSection + + OutputSection is a container of Chunks. A Chunk belong to at most + one OutputSection. + +There are mainly three actors in this linker. + +* InputFile + + InputFile is a superclass for file readers. We have a different + subclass for each input file type, such as regular object file, + archive file, etc. They are responsible for creating and owning + SymbolBodies and Chunks. + +* Writer + + The writer is responsible for writing file headers and Chunks to a + file. It creates OutputSections, put all Chunks into them, assign + unique, non-overlapping addresses and file offsets to them, and then + write them down to a file. + +* Driver + + The linking process is drived by the driver. The driver + + - processes command line options, + - creates a symbol table, + - creates an InputFile for each input file and put all symbols in it + into the symbol table, + - checks if there's no remaining undefined symbols, + - creates a writer, + - and passes the symbol table to the writer to write the result to a + file. + +Performance +----------- + +Currently it's able to self-host on the Windows platform. It takes 1.2 +seconds to self-host on my Xeon 2580 machine, while the existing +Atom-based linker takes 5 seconds to self-host. We believe the +performance difference comes from simplification and optimizations we +made to the new port. Notable differences are listed below. + +* Reduced number of relocation table reads + + In the existing design, relocation tables are read from beginning to + construct graphs because they consist of graph edges. In the new + design, they are not read until we actually apply relocations. + + This simplification has two benefits. One is that we don't create + additional objects for relocations but instead consume relocation + tables directly. The other is that it reduces number of relocation + entries we have to read, because we won't read relocations for + dead-stripped COMDAT sections. Large C++ programs tend to consist of + lots of COMDAT sections. In the existing design, the time to process + relocation table is linear to size of input. In this new model, it's + linear to size of output. + +* Reduced number of symbol table lookup + + Symbol table lookup can be a heavy operation because number of + symbols can be very large and each symbol name can be very long + (think of C++ mangled symbols -- time to compute a hash value for a + string is linear to the length.) + + We look up the symbol table exactly only once for each symbol in the + new design. This is I believe the minimum possible number. This is + achieved by the separation of Symbol and SymbolBody. Once you get a + pointer to a Symbol by looking up the symbol table, you can always + get the latest symbol resolution result by just dereferencing a + pointer. (I'm not sure if the idea is new to the linker. At least, + all other linkers I've investigated so far seem to look up hash + tables or sets more than once for each new symbol, but I may be + wrong.) + +* Reduced number of file visits + + The symbol table implements the Windows linker semantics. We treat + the symbol table as a bucket of all known symbols, including symbols + in archive file headers. We put all symbols into one bucket as we + visit new files. That means we visit each file only once. + + This is different from the Unix linker semantics, in which we only + keep undefined symbols and visit each file one by one until we + resolve all undefined symbols. In the Unix model, we have to visit + archive files many times if there are circular dependencies between + archives. + +* Avoiding creating additional objects or copying data + + The data structures described in the previous section are all thin + wrappers for classes that LLVM libObject provides. We avoid copying + data from libObject's objects to our objects. We read much less data + than before. For example, we don't read symbol values until we apply + relocations because these values are not relevant to symbol + resolution. Again, COMDAT symbols may be discarded during symbol + resolution, so reading their attributes too early could result in a + waste. We use underlying objects directly where doing so makes + sense. + +Parallelism +----------- + +The abovementioned data structures are also chosen with +multi-threading in mind. It should relatively be easy to make the +symbol table a concurrent hash map, so that we let multiple workers +work on symbol table concurrently. Symbol resolution in this design is +a single pointer mutation, which allows the resolver work concurrently +in a lock-free manner using atomic pointer compare-and-swap. + +It should also be easy to apply relocations and write chunks concurrently. + +We created an experimental multi-threaded linker using the Microsoft +ConcRT concurrency library, and it was able to link itself in 0.5 +seconds, so we think the design is promising. Index: COFF/SymbolTable.h =================================================================== --- /dev/null +++ COFF/SymbolTable.h @@ -0,0 +1,77 @@ +//===- SymbolTable.h ------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_SYMBOL_TABLE_H +#define LLD_COFF_SYMBOL_TABLE_H + +#include "InputFiles.h" +#include "Memory.h" +#include "llvm/Support/Allocator.h" +#include + +namespace lld { +namespace coff { + +// SymbolTable is a bucket of all known symbols, including defined, +// undefined, or can-be-defined symbols (the last one is symbols in +// archive files whose archive members are not yet loaded). +// +// We put all symbols of all files to a SymbolTable, and the +// SymbolTable selects the "best" symbols if there are name +// conflicts. For example, obviously, a defined symbol is better than +// an undefined symbol. Or, if there's a conflict between a +// can-be-defined and a undefined, it'll read an archive member to +// read a real definition to replace the can-be-defined symbol. The +// logic is implemented in resolve(). +class SymbolTable { +public: + SymbolTable(); + + std::error_code addFile(std::unique_ptr File); + + // Print an error message on undefined symbols. + bool reportRemainingUndefines(); + + // Returns a list of chunks of selected symbols. + std::vector getChunks(); + + // Returns a symbol for a given name. It's not guaranteed that the + // returned symbol actually has the same name (because of various + // mechanisms to allow aliases, a name can be resolved to a + // different symbol). Returns a nullptr if not found. + SymbolBody *find(StringRef Name); + + // Dump contents of the symbol table to stderr. + void dump(); + + // The writer needs to handle DLL import libraries specially in + // order to create the import descriptor table. + std::vector> ImportFiles; + +private: + std::error_code addObject(ObjectFile *File); + std::error_code addArchive(ArchiveFile *File); + std::error_code addImport(ImportFile *File); + + std::error_code resolve(SymbolBody *Body); + std::error_code addMemberFile(CanBeDefined *Body); + void addInitialSymbol(SymbolBody *Body); + + std::unordered_map Symtab; + std::vector> ObjectFiles; + std::vector> ArchiveFiles; + std::vector> OwnedSymbols; + llvm::BumpPtrAllocator Alloc; + StringAllocator StringAlloc; +}; + +} // namespace pecoff +} // namespace lld + +#endif Index: COFF/SymbolTable.cpp =================================================================== --- /dev/null +++ COFF/SymbolTable.cpp @@ -0,0 +1,178 @@ +//===- SymbolTable.cpp ----------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Driver.h" +#include "SymbolTable.h" +#include "lld/Core/Error.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace lld { +namespace coff { + +SymbolTable::SymbolTable() { + addInitialSymbol(new DefinedAbsolute("__ImageBase", Config->ImageBase)); + addInitialSymbol(new Undefined(Config->EntryName)); +} + +void SymbolTable::addInitialSymbol(SymbolBody *Body) { + OwnedSymbols.push_back(std::unique_ptr(Body)); + Symtab[Body->getName()] = new (Alloc) Symbol(Body); +} + +std::error_code SymbolTable::addFile(std::unique_ptr File) { + if (auto EC = File->parse()) + return EC; + InputFile *FileP = File.release(); + if (auto *P = dyn_cast(FileP)) + return addObject(P); + if (auto *P = dyn_cast(FileP)) + return addArchive(P); + return addImport(cast(FileP)); +} + +std::error_code SymbolTable::addObject(ObjectFile *File) { + ObjectFiles.emplace_back(File); + for (SymbolBody *Body : File->getSymbols()) + if (Body->isExternal()) + if (auto EC = resolve(Body)) + return EC; + + // If an object file contains .drectve section, read it and add + // files listed in the section. + StringRef Dir = File->getDirectives(); + if (!Dir.empty()) { + std::vector> Libs; + if (auto EC = parseDirectives(Dir, &Libs, &StringAlloc)) + return EC; + for (std::unique_ptr &Lib : Libs) + addFile(std::move(Lib)); + } + return std::error_code(); +} + +std::error_code SymbolTable::addArchive(ArchiveFile *File) { + ArchiveFiles.emplace_back(File); + for (SymbolBody *Body : File->getSymbols()) + if (auto EC = resolve(Body)) + return EC; + return std::error_code(); +} + +std::error_code SymbolTable::addImport(ImportFile *File) { + ImportFiles.emplace_back(File); + for (SymbolBody *Body : File->getSymbols()) + if (auto EC = resolve(Body)) + return EC; + return std::error_code(); +} + +bool SymbolTable::reportRemainingUndefines() { + bool Ret = false; + for (auto &I : Symtab) { + Symbol *Sym = I.second; + auto *Undef = dyn_cast(Sym->Body); + if (!Undef) + continue; + if (SymbolBody *Alias = Undef->getWeakAlias()) { + // If an Undefined has a fallback, it'll be replaced with the + // fallback symbol. The StringBody object the Undefined has may + // be stale, so get the latest result by following the back + // pointer and then the forward poitner. + Sym->Body = Alias->getReplacement(); + if (!isa(Sym->Body)) { + llvm::errs() << "undefined symbol: " << Undef->getName() << "\n"; + Ret = true; + } + continue; + } + llvm::errs() << "undefined symbol: " << Undef->getName() << "\n"; + Ret = true; + } + return Ret; +} + +// This function resolves conflicts if there's an existing symbol with +// the same name. Decisions are made based on symbol type. +std::error_code SymbolTable::resolve(SymbolBody *New) { + // Find an existing Symbol or create and insert a new one. + StringRef Name = New->getName(); + Symbol *&Sym = Symtab[Name]; + if (!Sym) { + Sym = new (Alloc) Symbol(New); + New->setBackref(Sym); + return std::error_code(); + } + New->setBackref(Sym); + + // compare() returns -1, 0, or 1 if the lhs symbol is less preferable, + // equivalent (conflicting), or more preferable, respectively. + SymbolBody *Existing = Sym->Body; + int comp = Existing->compare(New); + if (comp < 0) + Sym->Body = New; + if (comp == 0) + return make_dynamic_error_code(Twine("duplicate symbol: ") + Name); + + // If we have an Undefined symbol for a CanBeDefined symbol, we need + // to read an archive member to replace the CanBeDefined symbol with + // a Defined symbol. + if (isa(Existing) || isa(New)) + if (auto *B = dyn_cast(Sym->Body)) + return addMemberFile(B); + return std::error_code(); +} + +// Reads an archive member file pointed by a given symbol. +std::error_code SymbolTable::addMemberFile(CanBeDefined *Body) { + auto FileOrErr = Body->getMember(); + if (auto EC = FileOrErr.getError()) + return EC; + std::unique_ptr File = std::move(FileOrErr.get()); + + // getMember returns an empty buffer if the member was already + // read from the library. + if (!File) + return std::error_code(); + if (Config->Verbose) + llvm::dbgs() << "Loaded " << File->getShortName() << " for " + << Body->getName() << "\n"; + return addFile(std::move(File)); +} + +std::vector SymbolTable::getChunks() { + std::vector Res; + for (std::unique_ptr &File : ObjectFiles) { + std::vector &V = File->getChunks(); + Res.insert(Res.end(), V.begin(), V.end()); + } + return Res; +} + +SymbolBody *SymbolTable::find(StringRef Name) { + auto It = Symtab.find(Name); + if (It == Symtab.end()) + return nullptr; + return It->second->Body; +} + +void SymbolTable::dump() { + for (auto &P : Symtab) { + StringRef Name = P.first; + Symbol *Ref = P.second; + if (auto *Body = dyn_cast(Ref->Body)) + llvm::dbgs() << Twine::utohexstr(Config->ImageBase + Body->getRVA()) + << " " << Body->getName() << "\n"; + } +} + +} // namespace coff +} // namespace lld Index: COFF/Symbols.h =================================================================== --- /dev/null +++ COFF/Symbols.h @@ -0,0 +1,255 @@ +//===- Symbols.h ----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_SYMBOLS_H +#define LLD_COFF_SYMBOLS_H + +#include "Chunks.h" +#include "Config.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/COFF.h" +#include +#include + +using llvm::object::Archive; +using llvm::object::COFFSymbolRef; + +namespace lld { +namespace coff { + +class ArchiveFile; +class InputFile; +class ObjectFile; +class SymbolBody; + +// A real symbol object, SymbolBody, is usually accessed indirectly +// through a Symbol. There's always one Symbol for each symbol name. +// The resolver updates SymbolBody pointers as it resolves symbols. +struct Symbol { + explicit Symbol(SymbolBody *P) : Body(P) {} + SymbolBody *Body; +}; + +// The base class for real symbol classes. +class SymbolBody { +public: + enum Kind { + DefinedRegularKind, + DefinedAbsoluteKind, + DefinedImportDataKind, + DefinedImportFuncKind, + UndefinedKind, + CanBeDefinedKind, + }; + + Kind kind() const { return SymbolKind; } + virtual ~SymbolBody() {} + + // Returns true if this is an external symbol. + virtual bool isExternal() { return true; } + + // Returns the symbol name. + StringRef getName() { return Name; } + + // A SymbolBody has a backreference to a Symbol. Originally they are + // doubly-linked. A backreference will never change. But the pointer + // in the Symbol may be mutated by the resolver. If you have a + // pointer P to a SymbolBody and are not sure whether the resolver + // has chosen the object among other objects having the same name, + // you can access P->getSymbol()->Body to get the resolver's result. + void setBackref(Symbol *P) { Backref = P; } + SymbolBody *getReplacement() { return Backref ? Backref->Body : this; } + + // Decides which symbol should "win" in the symbol table, this or + // the Other. Returns 1 if this wins, -1 if the Other wins, or 0 if + // they are duplicate (conflicting) symbols. + virtual int compare(SymbolBody *Other) = 0; + +protected: + SymbolBody(Kind K, StringRef N) : SymbolKind(K), Name(N) {} + +private: + const Kind SymbolKind; + StringRef Name; + Symbol *Backref = nullptr; +}; + +// The base class for any defined symbols, including absolute symbols, +// etc. +class Defined : public SymbolBody { +public: + Defined(Kind K, StringRef Name) : SymbolBody(K, Name) {} + + static bool classof(const SymbolBody *S) { + Kind K = S->kind(); + return DefinedRegularKind <= K && K <= DefinedImportFuncKind; + } + + // Returns the RVA (relative virtual address) of this symbol. The + // writer sets and uses RVAs. + virtual uint64_t getRVA() = 0; + + // Returns the file offset of this symbol in the final executable. + // The writer uses this information to apply relocations. + virtual uint64_t getFileOff() = 0; + + // Called by the garbage collector. All Defined subclasses should + // know how to call markLive to dependent symbols. + virtual void markLive() {} + + int compare(SymbolBody *Other) override; +}; + +// Regular defined symbols read from object file symbol tables. +class DefinedRegular : public Defined { +public: + DefinedRegular(ObjectFile *F, StringRef Name, COFFSymbolRef S, Chunk *C) + : Defined(DefinedRegularKind, Name), File(F), Sym(S), Data(C) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedRegularKind; + } + + uint64_t getRVA() override { return Data->getRVA() + Sym.getValue(); } + bool isExternal() override { return Sym.isExternal(); } + void markLive() override { Data->markLive(); } + uint64_t getFileOff() override { return Data->getFileOff() + Sym.getValue(); } + bool isCOMDAT() const { return Data->isCOMDAT(); } + + // Returns true if this is a common symbol. + bool isCommon() const { return Sym.isCommon(); } + uint32_t getCommonSize() const { return Sym.getValue(); } + +private: + ObjectFile *File; + COFFSymbolRef Sym; + Chunk *Data; +}; + +// Absolute symbols. +class DefinedAbsolute : public Defined { +public: + DefinedAbsolute(StringRef Name, uint64_t VA) + : Defined(DefinedAbsoluteKind, Name), RVA(VA - Config->ImageBase) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedAbsoluteKind; + } + + uint64_t getRVA() override { return RVA; } + uint64_t getFileOff() override { llvm_unreachable("internal error"); } + +private: + uint64_t RVA; +}; + +// This class represents a symbol imported from a DLL. This has two +// names for internal use and external use. The former is used for +// name resolution, and the latter is used for the import descriptor +// table in an output. The former has "__imp_" prefix. +class DefinedImportData : public Defined { +public: + DefinedImportData(StringRef D, StringRef ImportName, StringRef ExportName) + : Defined(DefinedImportDataKind, ImportName), DLLName(D), + ExpName(ExportName) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedImportDataKind; + } + + uint64_t getRVA() override { return Location->getRVA(); } + uint64_t getFileOff() override { return Location->getFileOff(); } + StringRef getDLLName() { return DLLName; } + StringRef getExportName() { return ExpName; } + void setLocation(Chunk *AddressTable) { Location = AddressTable; } + +private: + StringRef DLLName; + StringRef ExpName; + Chunk *Location = nullptr; +}; + +// This class represents a symbol defined in an archive file. It is +// created from an archive file header, and it knows how to load an +// object file from an archive to replace itself with a defined +// symbol. If the resolver finds both Undefined and CanBeDefined for +// the same name, it will ask the CanBeDefined to load a file. +class CanBeDefined : public SymbolBody { +public: + CanBeDefined(ArchiveFile *F, const Archive::Symbol S) + : SymbolBody(CanBeDefinedKind, S.getName()), File(F), Sym(S) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == CanBeDefinedKind; + } + + // Returns an object file for this symbol, or a nullptr if the file + // was already returned. + ErrorOr> getMember(); + + int compare(SymbolBody *Other) override; + +private: + ArchiveFile *File; + const Archive::Symbol Sym; +}; + +// Undefined symbols. +class Undefined : public SymbolBody { +public: + explicit Undefined(StringRef Name, SymbolBody **S = nullptr) + : SymbolBody(UndefinedKind, Name), Alias(S) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == UndefinedKind; + } + + // An undefined symbol can have a fallback symbol which gives an + // undefined symbol a second chance if it would remain undefined. + // If it remains undefined, it'll be replaced with whatever the + // Alias pointer points to. + SymbolBody *getWeakAlias() { return Alias ? *Alias : nullptr; } + + int compare(SymbolBody *Other) override; + +private: + SymbolBody **Alias; +}; + +// Windows-specific classes. + +// This class represents a symbol for a jump table entry which jumps +// to a function in a DLL. Linker are supposed to create such symbols +// without "__imp_" prefix for all function symbols exported from +// DLLs, so that you can call DLL functions as regular functions with +// a regular name. A function pointer is given as a DefinedImportData. +class DefinedImportFunc : public Defined { +public: + DefinedImportFunc(StringRef Name, DefinedImportData *S) + : Defined(DefinedImportFuncKind, Name), Data(S) {} + + static bool classof(const SymbolBody *S) { + return S->kind() == DefinedImportFuncKind; + } + + uint64_t getRVA() override { return Data.getRVA(); } + uint64_t getFileOff() override { return Data.getFileOff(); } + Chunk *getChunk() { return &Data; } + +private: + DefinedImportData *ImpSymbol; + ImportFuncChunk Data; +}; + +} // namespace coff +} // namespace lld + +#endif Index: COFF/Symbols.cpp =================================================================== --- /dev/null +++ COFF/Symbols.cpp @@ -0,0 +1,93 @@ +//===- Symbols.cpp --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "InputFiles.h" +#include "Symbols.h" +#include "lld/Core/Error.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::object; +using llvm::sys::fs::identify_magic; +using llvm::sys::fs::file_magic; + +namespace lld { +namespace coff { + +// Returns 1, 0 or -1 if this symbol should take precedence over the +// Other in the symbol table, tie or lose, respectively. +int Defined::compare(SymbolBody *Other) { + if (!isa(Other)) + return 1; + auto *X = dyn_cast(this); + auto *Y = dyn_cast(Other); + if (!X || !Y) + return 0; + + // Common symbols are weaker than other types of defined symbols. + if (X->isCommon() && Y->isCommon()) + return (X->getCommonSize() < Y->getCommonSize()) ? -1 : 1; + if (X->isCommon()) + return -1; + if (Y->isCommon()) + return 1; + + if (X->isCOMDAT() && Y->isCOMDAT()) + return 1; + return 0; +} + +int CanBeDefined::compare(SymbolBody *Other) { + if (isa(Other)) + return -1; + + // Undefined symbols with weak aliases will turn into defined + // symbols if they remain undefined, so we don't need to resolve + // such symbols. + if (auto *U = dyn_cast(Other)) + if (U->getWeakAlias()) + return -1; + return 1; +} + +int Undefined::compare(SymbolBody *Other) { + if (isa(Other)) + return -1; + if (isa(Other)) + return getWeakAlias() ? 1 : -1; + if (cast(Other)->getWeakAlias()) + return -1; + return 1; +} + +ErrorOr> CanBeDefined::getMember() { + auto MBRefOrErr = File->getMember(&Sym); + if (auto EC = MBRefOrErr.getError()) + return EC; + MemoryBufferRef MBRef = MBRefOrErr.get(); + + // getMember returns an empty buffer if the member was already + // read from the library. + if (MBRef.getBuffer().empty()) + return nullptr; + + file_magic Magic = identify_magic(MBRef.getBuffer()); + if (Magic == file_magic::coff_import_library) + return llvm::make_unique(MBRef); + + if (Magic != file_magic::coff_object) + return make_dynamic_error_code("unknown file type"); + + std::unique_ptr Obj(new ObjectFile(MBRef.getBufferIdentifier(), MBRef)); + Obj->setParentName(File->getName()); + return std::move(Obj); +} +} +} Index: COFF/Writer.h =================================================================== --- /dev/null +++ COFF/Writer.h @@ -0,0 +1,106 @@ +//===- Writer.h -----------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COFF_WRITER_H +#define LLD_COFF_WRITER_H + +#include "InputFiles.h" +#include "SymbolTable.h" +#include "llvm/Support/FileOutputBuffer.h" +#include +#include + +namespace lld { +namespace coff { + +// Mask for section types (code, data or bss) and permissions +// (writable, readable or executable). +const uint32_t PermMask = 0xF00000F0; + +// OutputSection represents a section in an output file. It's a +// container of chunks. OutputSection and Chunk are 1:N relationship. +// Chunks cannot belong to more than one OutputSections. The writer +// creates multiple OutputSections and assign them unique, +// non-overlapping file offsets and RVAs. +class OutputSection { +public: + OutputSection(StringRef Name, uint32_t SectionIndex); + void setRVA(uint64_t); + void setFileOffset(uint64_t); + void addChunk(Chunk *C); + StringRef getName() { return Name; } + uint64_t getSectionIndex() { return SectionIndex; } + std::vector &getChunks() { return Chunks; } + + const llvm::object::coff_section getHeader() { return Header; } + void addPermissions(uint32_t C); + uint32_t getPermissions() { return Header.Characteristics & PermMask; } + uint32_t getCharacteristics() { return Header.Characteristics; } + uint64_t getRVA() { return Header.VirtualAddress; } + uint64_t getFileOff() { return Header.PointerToRawData; } + + // Returns the size of this section in an executable memory image. + // This may be smaller than the raw size (the raw size is multiple + // of disk sector size, so there may be padding at end), or may be + // larger (if that's the case, the loader reserves spaces after end + // of raw data). + uint64_t getVirtualSize() { return Header.VirtualSize; } + + // Returns the size of the section in the output file. + uint64_t getRawSize() { return Header.SizeOfRawData; } + +private: + llvm::object::coff_section Header; + StringRef Name; + uint32_t SectionIndex; + std::vector Chunks; +}; + +// The writer writes a SymbolTable result to a file. +class Writer { +public: + explicit Writer(SymbolTable *T) : Symtab(T) {} + std::error_code write(StringRef Path); + +private: + void markLive(); + void createSections(); + void createImportTables(); + void assignAddresses(); + void removeEmptySections(); + std::error_code openFile(StringRef OutputPath); + void writeHeader(); + void writeSections(); + void applyRelocations(); + + OutputSection *findSection(StringRef Name); + OutputSection *createSection(StringRef Name); + + uint32_t getSizeOfInitializedData(); + std::map> binImports(); + + SymbolTable *Symtab; + std::unique_ptr Buffer; + std::vector> OutputSections; + Chunk *ImportAddressTable = nullptr; + uint32_t ImportDirectoryTableSize = 0; + uint32_t ImportAddressTableSize = 0; + + Defined *Entry; + uint64_t FileSize; + uint64_t SizeOfImage; + uint64_t SizeOfHeaders; + + std::vector> Chunks; +}; + +} // namespace pecoff +} // namespace lld + +#endif Index: COFF/Writer.cpp =================================================================== --- /dev/null +++ COFF/Writer.cpp @@ -0,0 +1,380 @@ +//===- Writer.cpp ---------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Config.h" +#include "Writer.h" +#include "lld/Core/Error.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::object; +using namespace llvm::COFF; + +static const int PageSize = 4096; +static const int FileAlignment = 512; +static const int SectionAlignment = 4096; +static const int DOSStubSize = 64; +static const int NumberfOfDataDirectory = 16; +static const int HeaderSize = + DOSStubSize + sizeof(PEMagic) + sizeof(coff_file_header) + + sizeof(pe32plus_header) + sizeof(data_directory) * NumberfOfDataDirectory; + +namespace lld { +namespace coff { + +OutputSection::OutputSection(StringRef N, uint32_t SI) + : Name(N), SectionIndex(SI) { + memset(&Header, 0, sizeof(Header)); + strncpy(Header.Name, Name.data(), std::min(Name.size(), size_t(8))); +} + +void OutputSection::setRVA(uint64_t RVA) { + Header.VirtualAddress = RVA; + for (Chunk *C : Chunks) + C->setRVA(C->getRVA() + RVA); +} + +void OutputSection::setFileOffset(uint64_t Off) { + if (Header.SizeOfRawData == 0) + return; + Header.PointerToRawData = Off; + for (Chunk *C : Chunks) + C->setFileOff(C->getFileOff() + Off); +} + +void OutputSection::addChunk(Chunk *C) { + Chunks.push_back(C); + uint64_t Off = Header.VirtualSize; + Off = RoundUpToAlignment(Off, C->getAlign()); + C->setRVA(Off); + C->setFileOff(Off); + Off += C->getSize(); + Header.VirtualSize = Off; + if (C->hasData()) + Header.SizeOfRawData = RoundUpToAlignment(Off, FileAlignment); +} + +void OutputSection::addPermissions(uint32_t C) { + Header.Characteristics = Header.Characteristics | (C & PermMask); +} + +static StringRef dropDollar(StringRef S) { return S.substr(0, S.find('$')); } + +void Writer::markLive() { + Entry = cast(Symtab->find(Config->EntryName)); + Entry->markLive(); + for (Chunk *C : Symtab->getChunks()) + if (C->isRoot()) + C->markLive(); +} + +void Writer::createSections() { + std::map> Map; + for (Chunk *C : Symtab->getChunks()) { + if (!C->isLive()) { + if (Config->Verbose) + C->printDiscardedMessage(); + continue; + } + // '$' and all following characters in input section names are + // discarded when determining output section. So, .text$foo + // contributes to .text, for example. See PE/COFF spec 3.2. + Map[dropDollar(C->getSectionName())].push_back(C); + } + + // Input sections are ordered by their names including '$' parts, + // which gives you some control over the output layout. + auto Comp = [](Chunk *A, Chunk *B) { + return A->getSectionName() < B->getSectionName(); + }; + for (auto &P : Map) { + StringRef SectionName = P.first; + std::vector &Chunks = P.second; + std::stable_sort(Chunks.begin(), Chunks.end(), Comp); + auto Sec = + llvm::make_unique(SectionName, OutputSections.size()); + for (Chunk *C : Chunks) { + C->setOutputSection(Sec.get()); + Sec->addChunk(C); + Sec->addPermissions(C->getPermissions()); + } + OutputSections.push_back(std::move(Sec)); + } +} + +std::map> Writer::binImports() { + // Group DLL-imported symbols by DLL name because that's how symbols + // are layed out in the import descriptor table. + std::map> Res; + OutputSection *Text = createSection(".text"); + for (std::unique_ptr &P : Symtab->ImportFiles) { + for (SymbolBody *B : P->getSymbols()) { + if (auto *Import = dyn_cast(B)) { + Res[Import->getDLLName()].push_back(Import); + continue; + } + // Linker-created function thunks for DLL symbols are added to + // .text section. + Text->addChunk(cast(B)->getChunk()); + } + } + + // Sort symbols by name for each group. + auto Comp = [](DefinedImportData *A, DefinedImportData *B) { + return A->getName() < B->getName(); + }; + for (auto &P : Res) { + std::vector &V = P.second; + std::sort(V.begin(), V.end(), Comp); + } + return Res; +} + +// Create .idata section contents. +void Writer::createImportTables() { + if (Symtab->ImportFiles.empty()) + return; + + std::vector Tabs; + for (auto &P : binImports()) { + StringRef DLLName = P.first; + std::vector &Imports = P.second; + Tabs.emplace_back(DLLName, Imports); + } + OutputSection *Idata = createSection(".idata"); + size_t NumChunks = Idata->getChunks().size(); + + // Add the directory tables. + for (ImportTable &T : Tabs) + Idata->addChunk(T.DirTab); + Idata->addChunk(new NullChunk(sizeof(ImportDirectoryTableEntry))); + ImportDirectoryTableSize = (Tabs.size() + 1) * sizeof(ImportDirectoryTableEntry); + + // Add the import lookup tables. + for (ImportTable &T : Tabs) { + for (LookupChunk *C : T.LookupTables) + Idata->addChunk(C); + Idata->addChunk(new NullChunk(sizeof(uint64_t))); + } + + // Add the import address tables. Their contents are the same as the + // lookup tables. + for (ImportTable &T : Tabs) { + for (LookupChunk *C : T.AddressTables) + Idata->addChunk(C); + Idata->addChunk(new NullChunk(sizeof(uint64_t))); + ImportAddressTableSize += (T.AddressTables.size() + 1) * sizeof(uint64_t); + } + ImportAddressTable = Tabs[0].AddressTables[0]; + + // Add the hint name table. + for (ImportTable &T : Tabs) + for (HintNameChunk *C : T.HintNameTables) + Idata->addChunk(C); + + // Add DLL names. + for (ImportTable &T : Tabs) + Idata->addChunk(T.DLLName); + + // Claim ownership of all chunks in the .idata section. + for (size_t I = NumChunks, E = Idata->getChunks().size(); I < E; ++I) + Chunks.push_back(std::unique_ptr(Idata->getChunks()[I])); +} + +// The Windows loader doesn't seem to like empty sections, +// so we remove them if any. +void Writer::removeEmptySections() { + auto IsEmpty = [](const std::unique_ptr &S) { + return S->getVirtualSize() == 0; + }; + OutputSections.erase( + std::remove_if(OutputSections.begin(), OutputSections.end(), IsEmpty), + OutputSections.end()); +} + +// Visits all sections to assign incremental, non-overlapping RVAs and +// file offsets. +void Writer::assignAddresses() { + SizeOfHeaders = RoundUpToAlignment( + HeaderSize + sizeof(coff_section) * OutputSections.size(), PageSize); + uint64_t RVA = 0x1000; // The first page is kept unmapped. + uint64_t FileOff = SizeOfHeaders; + for (std::unique_ptr &Sec : OutputSections) { + Sec->setRVA(RVA); + Sec->setFileOffset(FileOff); + RVA += RoundUpToAlignment(Sec->getVirtualSize(), PageSize); + FileOff += RoundUpToAlignment(Sec->getRawSize(), FileAlignment); + } + SizeOfImage = SizeOfHeaders + RoundUpToAlignment(RVA - 0x1000, PageSize); + FileSize = SizeOfHeaders + + RoundUpToAlignment(FileOff - SizeOfHeaders, FileAlignment); +} + +void Writer::writeHeader() { + // Write DOS stub + uint8_t *Buf = Buffer->getBufferStart(); + auto *DOS = reinterpret_cast(Buf); + Buf += DOSStubSize; + DOS->Magic[0] = 'M'; + DOS->Magic[1] = 'Z'; + DOS->AddressOfRelocationTable = sizeof(dos_header); + DOS->AddressOfNewExeHeader = DOSStubSize; + + // Write PE magic + memcpy(Buf, PEMagic, sizeof(PEMagic)); + Buf += sizeof(PEMagic); + + // Write COFF header + coff_file_header *COFF = reinterpret_cast(Buf); + Buf += sizeof(coff_file_header); + COFF->Machine = IMAGE_FILE_MACHINE_AMD64; + COFF->NumberOfSections = OutputSections.size(); + COFF->Characteristics = + (IMAGE_FILE_EXECUTABLE_IMAGE | IMAGE_FILE_RELOCS_STRIPPED | + IMAGE_FILE_LARGE_ADDRESS_AWARE); + COFF->SizeOfOptionalHeader = + sizeof(pe32plus_header) + sizeof(data_directory) * NumberfOfDataDirectory; + + // Write PE header + pe32plus_header *PE = reinterpret_cast(Buf); + Buf += sizeof(pe32plus_header); + PE->Magic = PE32Header::PE32_PLUS; + PE->ImageBase = Config->ImageBase; + PE->SectionAlignment = SectionAlignment; + PE->FileAlignment = FileAlignment; + PE->MajorOperatingSystemVersion = 6; + PE->MajorSubsystemVersion = 6; + PE->Subsystem = IMAGE_SUBSYSTEM_WINDOWS_CUI; + PE->SizeOfImage = SizeOfImage; + PE->SizeOfHeaders = SizeOfHeaders; + PE->AddressOfEntryPoint = Entry->getRVA(); + PE->SizeOfStackReserve = 1024 * 1024; + PE->SizeOfStackCommit = 4096; + PE->SizeOfHeapReserve = 1024 * 1024; + PE->SizeOfHeapCommit = 4096; + PE->NumberOfRvaAndSize = NumberfOfDataDirectory; + if (OutputSection *Text = findSection(".text")) { + PE->BaseOfCode = Text->getRVA(); + PE->SizeOfCode = Text->getRawSize(); + } + PE->SizeOfInitializedData = getSizeOfInitializedData(); + + // Write data directory + data_directory *DataDirectory = reinterpret_cast(Buf); + Buf += sizeof(data_directory) * NumberfOfDataDirectory; + if (OutputSection *Idata = findSection(".idata")) { + using namespace llvm::COFF; + DataDirectory[IMPORT_TABLE].RelativeVirtualAddress = Idata->getRVA(); + DataDirectory[IMPORT_TABLE].Size = ImportDirectoryTableSize; + DataDirectory[IAT].RelativeVirtualAddress = ImportAddressTable->getRVA(); + DataDirectory[IAT].Size = ImportAddressTableSize; + } + + // Write section table + coff_section *SectionTable = reinterpret_cast(Buf); + int Idx = 0; + for (std::unique_ptr &Out : OutputSections) + SectionTable[Idx++] = Out->getHeader(); +} + +std::error_code Writer::openFile(StringRef Path) { + if (auto EC = FileOutputBuffer::create(Path, FileSize, Buffer, + FileOutputBuffer::F_executable)) + return make_dynamic_error_code(Twine("Failed to open ") + Path + ": " + + EC.message()); + return std::error_code(); +} + +// Write section contents to a mmap'ed file. +void Writer::writeSections() { + uint8_t *Buf = Buffer->getBufferStart(); + for (std::unique_ptr &Sec : OutputSections) { + // Fill gaps between functions in .text with INT3 instructions + // instead of leaving as NUL bytes (which can be interpreted as + // ADD instructions). + if (Sec->getPermissions() & IMAGE_SCN_CNT_CODE) + memset(Buf + Sec->getFileOff(), 0xCC, Sec->getRawSize()); + for (Chunk *C : Sec->getChunks()) + if (C->hasData()) + memcpy(Buf + C->getFileOff(), C->getData(), C->getSize()); + } +} + +OutputSection *Writer::findSection(StringRef Name) { + for (std::unique_ptr &Sec : OutputSections) + if (Sec->getName() == Name) + return Sec.get(); + return nullptr; +} + +uint32_t Writer::getSizeOfInitializedData() { + uint32_t Res = 0; + for (std::unique_ptr &S : OutputSections) + if (S->getPermissions() & IMAGE_SCN_CNT_INITIALIZED_DATA) + Res += S->getRawSize(); + return Res; +} + +// Returns an existing section or create a new one if not found. +OutputSection *Writer::createSection(StringRef Name) { + if (auto *Sec = findSection(Name)) + return Sec; + const auto R = IMAGE_SCN_MEM_READ; + const auto W = IMAGE_SCN_MEM_WRITE; + const auto E = IMAGE_SCN_MEM_EXECUTE; + uint32_t Perm = StringSwitch(Name) + .Case(".bss", IMAGE_SCN_CNT_UNINITIALIZED_DATA | R | W) + .Case(".data", IMAGE_SCN_CNT_INITIALIZED_DATA | R | W) + .Case(".idata", IMAGE_SCN_CNT_INITIALIZED_DATA | R) + .Case(".rdata", IMAGE_SCN_CNT_INITIALIZED_DATA | R) + .Case(".text", IMAGE_SCN_CNT_CODE | R | E) + .Default(0); + if (!Perm) + llvm_unreachable("unknown section name"); + auto Sec = new OutputSection(Name, OutputSections.size()); + Sec->addPermissions(Perm); + OutputSections.push_back(std::unique_ptr(Sec)); + return Sec; +} + +void Writer::applyRelocations() { + uint8_t *Buf = Buffer->getBufferStart(); + for (std::unique_ptr &Sec : OutputSections) + for (Chunk *C : Sec->getChunks()) + C->applyRelocations(Buf); +} + +std::error_code Writer::write(StringRef OutputPath) { + markLive(); + createSections(); + createImportTables(); + assignAddresses(); + removeEmptySections(); + if (auto EC = openFile(OutputPath)) + return EC; + writeHeader(); + writeSections(); + applyRelocations(); + if (auto EC = Buffer->commit()) + return EC; + return std::error_code(); +} + +} // namespace coff +} // namespace lld Index: include/lld/Driver/Driver.h =================================================================== --- include/lld/Driver/Driver.h +++ include/lld/Driver/Driver.h @@ -140,6 +140,11 @@ WinLinkDriver() = delete; }; +/// Driver for Windows 'link.exe' command line options +namespace coff { +bool link(int argc, const char *argv[]); +} + /// Driver for lld unit tests class CoreDriver : public Driver { public: Index: lib/Driver/UniversalDriver.cpp =================================================================== --- lib/Driver/UniversalDriver.cpp +++ lib/Driver/UniversalDriver.cpp @@ -70,6 +70,7 @@ invalid, gnu_ld, // -flavor gnu win_link, // -flavor link + win_link2, // -flavor link2 darwin_ld, // -flavor darwin core // -flavor core OR -core }; @@ -86,6 +87,8 @@ .Case("gnu", Flavor::gnu_ld) .Case("link", Flavor::win_link) .Case("lld-link", Flavor::win_link) + .Case("link2", Flavor::win_link2) + .Case("lld-link2", Flavor::win_link2) .Case("darwin", Flavor::darwin_ld) .Case("core", Flavor::core) .Case("ld", Flavor::gnu_ld) @@ -205,6 +208,8 @@ return DarwinLdDriver::linkMachO(args.size(), args.data(), diagnostics); case Flavor::win_link: return WinLinkDriver::linkPECOFF(args.size(), args.data(), diagnostics); + case Flavor::win_link2: + return coff::link(args.size(), args.data()); case Flavor::core: return CoreDriver::link(args.size(), args.data(), diagnostics); case Flavor::invalid: Index: test/COFF/Inputs/hello64.asm =================================================================== --- /dev/null +++ test/COFF/Inputs/hello64.asm @@ -0,0 +1,22 @@ +;; ml64 hello64.asm /link /subsystem:windows /defaultlib:kernel32 \ +;; /defaultlib:user32 /out:hello64.exe /entry:main + +extern ExitProcess : PROC +extern MessageBoxA : PROC + +.data + caption db 'Hello', 0 + message db 'Hello World!', 0 + +.code +main PROC + sub rsp,28h + mov rcx, 0 + lea rdx, message + lea r8, caption + mov r9d, 0 + call MessageBoxA + mov ecx, 0 + call ExitProcess +main ENDP +END Index: test/COFF/driver.test =================================================================== --- /dev/null +++ test/COFF/driver.test @@ -0,0 +1,5 @@ +# RUN: not lld -flavor link2 nosuchfile.obj >& %t.log +# RUN: FileCheck -check-prefix=MISSING %s < %t.log +MISSING: nosuchfile.obj: no such file or directory + +# RUN: lld -flavor link2 /entry:main /out:%t.exe %p/Inputs/ret42.obj Index: test/COFF/imports.test =================================================================== --- /dev/null +++ test/COFF/imports.test @@ -0,0 +1,27 @@ +# Verify that the lld can handle .lib files and emit .idata sections. +# +# RUN: lld -flavor link2 /out:%t.exe /entry:main %p/Inputs/hello64.obj \ +# RUN: %p/Inputs/std64.lib +# RUN: llvm-objdump -d %t.exe | FileCheck -check-prefix=TEXT %s +# RUN: llvm-readobj -coff-imports %t.exe | FileCheck -check-prefix=IMPORT %s + +TEXT: Disassembly of section .text: +TEXT-NEXT: .text: +TEXT-NEXT: subq $40, %rsp +TEXT-NEXT: movq $0, %rcx +TEXT-NEXT: leaq -4108(%rip), %rdx +TEXT-NEXT: leaq -4121(%rip), %r8 +TEXT-NEXT: movl $0, %r9d +TEXT-NEXT: callq 16 +TEXT-NEXT: movl $0, %ecx +TEXT-NEXT: callq 0 +TEXT-NEXT: jmpq *4108(%rip) +TEXT-NEXT: jmpq *4110(%rip) + +IMPORT: Import { +IMPORT-NEXT: Name: std64.dll +IMPORT-NEXT: ImportLookupTableRVA: 0x3028 +IMPORT-NEXT: ImportAddressTableRVA: 0x3040 +IMPORT-NEXT: Symbol: ExitProcess (0) +IMPORT-NEXT: Symbol: MessageBoxA (0) +IMPORT-NEXT: } Index: tools/lld/CMakeLists.txt =================================================================== --- tools/lld/CMakeLists.txt +++ tools/lld/CMakeLists.txt @@ -4,6 +4,7 @@ target_link_libraries(lld lldDriver + lldCOFF LLVMSupport ) Index: unittests/DriverTests/CMakeLists.txt =================================================================== --- unittests/DriverTests/CMakeLists.txt +++ unittests/DriverTests/CMakeLists.txt @@ -8,6 +8,7 @@ target_link_libraries(DriverTests lldDriver + lldCOFF lldCore lldPECOFF lldMachO