Index: lld/COFF/InputFiles.h =================================================================== --- lld/COFF/InputFiles.h +++ lld/COFF/InputFiles.h @@ -156,6 +156,8 @@ llvm::Optional PCHSignature; private: + const coff_section* getSection(uint32_t I); + void initializeChunks(); void initializeSymbols(); Index: lld/COFF/InputFiles.cpp =================================================================== --- lld/COFF/InputFiles.cpp +++ lld/COFF/InputFiles.cpp @@ -126,6 +126,13 @@ initializeSymbols(); } +const coff_section* ObjFile::getSection(uint32_t I) { + const coff_section *Sec; + if (auto EC = COFFObj->getSection(I, Sec)) + fatal("getSection failed: #" + Twine(I) + ": " + EC.message()); + return Sec; +} + // We set SectionChunk pointers in the SparseChunks vector to this value // temporarily to mark comdat sections as having an unknown resolution. As we // walk the object file's symbol table, once we visit either a leader symbol or @@ -139,10 +146,7 @@ Chunks.reserve(NumSections); SparseChunks.resize(NumSections + 1); for (uint32_t I = 1; I < NumSections + 1; ++I) { - const coff_section *Sec; - if (auto EC = COFFObj->getSection(I, Sec)) - fatal("getSection failed: #" + Twine(I) + ": " + EC.message()); - + const coff_section *Sec = getSection(I); if (Sec->Characteristics & IMAGE_SCN_LNK_COMDAT) SparseChunks[I] = PendingComdat; else @@ -153,9 +157,7 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber, const coff_aux_section_definition *Def, StringRef LeaderName) { - const coff_section *Sec; - if (auto EC = COFFObj->getSection(SectionNumber, Sec)) - fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message()); + const coff_section *Sec = getSection(SectionNumber); StringRef Name; if (auto EC = COFFObj->getSectionName(Sec, Name)) @@ -231,8 +233,7 @@ StringRef Name, ParentName; COFFObj->getSymbolName(Sym, Name); - const coff_section *ParentSec; - COFFObj->getSection(ParentIndex, ParentSec); + const coff_section *ParentSec = getSection(ParentIndex); COFFObj->getSectionName(ParentSec, ParentName); error(toString(this) + ": associative comdat " + Name + " (sec " + Twine(SectionNumber) + ") has invalid reference to section " + @@ -429,10 +430,21 @@ fatal(toString(this) + ": " + GetName() + " should not refer to non-existent section " + Twine(SectionNumber)); - // Handle comdat leader symbols. + // Comdat handling. + // A comdat symbol consists of two symbol table entries. + // The first symbol entry has the name of the section (e.g. .text), fixed + // values for the other fields, and one auxilliary record. + // The second symbol entry has the name of the comdat symbol, called the + // "comdat leader". + // When this function is called for the first symbol entry of a comdat, + // it sets ComdatDefs and returns None, and when it's called for the second + // symbol entry it reads ComdatDefs and then sets it back to nullptr. + + // Handle comdat leader. if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) { ComdatDefs[SectionNumber] = nullptr; - Symbol *Leader; + DefinedRegular *Leader; + if (Sym.isExternal()) { std::tie(Leader, Prevailing) = Symtab->addComdat(this, GetName(), Sym.getGeneric()); @@ -442,10 +454,111 @@ Prevailing = true; } + if (Def->Selection < (int)IMAGE_COMDAT_SELECT_NODUPLICATES || + // Intentionally ends at IMAGE_COMDAT_SELECT_LARGEST: link.exe + // doesn't understand IMAGE_COMDAT_SELECT_NEWEST either. + Def->Selection > (int)IMAGE_COMDAT_SELECT_LARGEST) { + fatal("unknown comdat type " + std::to_string((int)Def->Selection) + + " for " + GetName() + " in " + toString(this)); + } + COMDATType Selection = (COMDATType)Def->Selection; + + if (!Prevailing && Leader->isCOMDAT()) { + // There's already an existing comdat for this symbol: `Leader`. + // Use the comdats's selection field to determine if the new + // symbol in `Sym` should be discarded, produce a duplicate symbol + // error, etc. + + SectionChunk *LeaderChunk = nullptr; + COMDATType LeaderSelection = IMAGE_COMDAT_SELECT_ANY; + + if (Leader->Data) { + LeaderChunk = Leader->getChunk(); + LeaderSelection = LeaderChunk->Selection; + } else { + // FIXME: comdats from LTO files don't know their selection; treat them + // as "any". + Selection = LeaderSelection; + } + + // Requiring selections to match exactly is a bit more strict than + // link.exe which allows merging "any" and "largest" if "any" is the first + // symbol the linker sees, and it allows merging "largest" with everything + // (!) if "largest" is the first symbol the linker sees. Making this + // symmetric independent of which selection is seen first seems better + // though, and if we can get away with not allowing merging "any" and + // "largest" that keeps things more regular too. + // (ModuleLinker::getComdatResult() also does comdat type merging in a + // different way and it's also a bit more permissive.) + if (Selection != LeaderSelection) { + std::string Msg = ("conflicting comdat type for " + toString(*Leader) + + ": " + Twine((int)LeaderSelection) + " in " + + toString(Leader->getFile()) + " and " + + Twine((int)Selection) + " in " + toString(this)) + .str(); + if (Config->ForceMultiple) + warn(Msg); + else + error(Msg); + } + + switch (Selection) { + case IMAGE_COMDAT_SELECT_NODUPLICATES: + Symtab->reportDuplicate(Leader, this); + break; + + case IMAGE_COMDAT_SELECT_ANY: + // Nothing to do. + break; + + case IMAGE_COMDAT_SELECT_SAME_SIZE: + if (LeaderChunk->getSize() != getSection(SectionNumber)->SizeOfRawData) + Symtab->reportDuplicate(Leader, this); + break; + + case IMAGE_COMDAT_SELECT_EXACT_MATCH: { + SectionChunk NewChunk(this, getSection(SectionNumber)); + // link.exe only compares section contents here and doesn't complain + // if the two comdat sections have e.g. different alignment. + // Match that. + if (LeaderChunk->getContents() != NewChunk.getContents()) + Symtab->reportDuplicate(Leader, this); + break; + } + + case IMAGE_COMDAT_SELECT_ASSOCIATIVE: + // createDefined() is never called for IMAGE_COMDAT_SELECT_ASSOCIATIVE. + // (This means lld-link doesn't produce duplicate symbol errors for + // associative comdats while link.exe does, but associate comdats + // are never extern in practice.) + llvm_unreachable("createDefined not called for associative comdats"); + + case IMAGE_COMDAT_SELECT_LARGEST: + if (LeaderChunk->getSize() < getSection(SectionNumber)->SizeOfRawData) { + // Replace the existing comdat symbol with the new one. + // FIXME: This is incorrect: With /opt:noref, the previous sections + // make it into the final executable as well. Correct handling would + // be to undo reading of the whole old section that's being replaced, + // or doing one pass that determines what the final largest comdat + // is for all IMAGE_COMDAT_SELECT_LARGEST comdats and then reading + // only the largest one. + replaceSymbol( + Leader, this, GetName(), /*IsCOMDAT*/ true, + /*IsExternal*/ true, Sym.getGeneric(), nullptr); + Prevailing = true; + } + break; + + case IMAGE_COMDAT_SELECT_NEWEST: + llvm_unreachable("should have been rejected earlier"); + } + } + if (Prevailing) { SectionChunk *C = readSection(SectionNumber, Def, GetName()); SparseChunks[SectionNumber] = C; C->Sym = cast(Leader); + C->Selection = Selection; cast(Leader)->Data = &C->Repl; } else { SparseChunks[SectionNumber] = nullptr; @@ -534,6 +647,8 @@ MB.getBuffer(), Saver.save(ParentName + MB.getBufferIdentifier())))); std::vector> Comdat(Obj->getComdatTable().size()); for (size_t I = 0; I != Obj->getComdatTable().size(); ++I) + // FIXME: lto::InputFile doesn't keep enough data to do correct comdat + // selection handling. Comdat[I] = Symtab->addComdat(this, Saver.save(Obj->getComdatTable()[I])); for (const lto::InputFile::Symbol &ObjSym : Obj->symbols()) { StringRef SymName = Saver.save(ObjSym.getName()); Index: lld/COFF/SymbolTable.h =================================================================== --- lld/COFF/SymbolTable.h +++ lld/COFF/SymbolTable.h @@ -27,6 +27,7 @@ class CommonChunk; class Defined; class DefinedAbsolute; +class DefinedRegular; class DefinedRelative; class Lazy; class SectionChunk; @@ -88,7 +89,7 @@ Symbol *addRegular(InputFile *F, StringRef N, const llvm::object::coff_symbol_generic *S = nullptr, SectionChunk *C = nullptr); - std::pair + std::pair addComdat(InputFile *F, StringRef N, const llvm::object::coff_symbol_generic *S = nullptr); Symbol *addCommon(InputFile *F, StringRef N, uint64_t Size, Index: lld/COFF/SymbolTable.cpp =================================================================== --- lld/COFF/SymbolTable.cpp +++ lld/COFF/SymbolTable.cpp @@ -399,7 +399,7 @@ return S; } -std::pair +std::pair SymbolTable::addComdat(InputFile *F, StringRef N, const coff_symbol_generic *Sym) { Symbol *S; @@ -408,11 +408,12 @@ if (WasInserted || !isa(S)) { replaceSymbol(S, F, N, /*IsCOMDAT*/ true, /*IsExternal*/ true, Sym, nullptr); - return {S, true}; + return {cast(S), true}; } - if (!cast(S)->isCOMDAT()) + auto *ExistingSymbol = cast(S); + if (!ExistingSymbol->isCOMDAT()) reportDuplicate(S, F); - return {S, false}; + return {ExistingSymbol, false}; } Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size, Index: lld/test/COFF/comdat-selection-associative-largest.s =================================================================== --- /dev/null +++ lld/test/COFF/comdat-selection-associative-largest.s @@ -0,0 +1,45 @@ +# REQUIRES: x86 + +# Tests handling of several comdats with "largest" selection type that each +# has an associative comdat. + +# Create obj files. +# RUN: sed -e s/TYPE/.byte/ -e s/SIZE/1/ %s | llvm-mc -triple x86_64-pc-win32 -filetype=obj -o %t.1.obj +# RUN: sed -e s/TYPE/.short/ -e s/SIZE/2/ %s | llvm-mc -triple x86_64-pc-win32 -filetype=obj -o %t.2.obj +# RUN: sed -e s/TYPE/.long/ -e s/SIZE/4/ %s | llvm-mc -triple x86_64-pc-win32 -filetype=obj -o %t.4.obj + + .section .text$ac, "", associative, symbol +assocsym: + .long SIZE + + .section .text$nm, "", largest, symbol + .globl symbol +symbol: + TYPE SIZE + +# Pass the obj files in different orders and check that only the associative +# comdat of the largest obj file makes it into the output, independent of +# the order of the obj files on the command line. + +# FIXME: Make these pass when /opt:noref is passed. + +# RUN: lld-link /include:symbol /dll /noentry /nodefaultlib %t.1.obj %t.2.obj %t.4.obj /out:%t.exe +# RUN: llvm-objdump -s %t.exe | FileCheck --check-prefix=ALL124 %s +# ALL124: Contents of section .text: +# ALL124: 180001000 04000000 04000000 .... + +# RUN: lld-link /include:symbol /dll /noentry /nodefaultlib %t.4.obj %t.2.obj %t.1.obj /out:%t.exe +# RUN: llvm-objdump -s %t.exe | FileCheck --check-prefix=ALL421 %s +# ALL421: Contents of section .text: +# ALL421: 180001000 04000000 04000000 .... + +# RUN: lld-link /include:symbol /dll /noentry /nodefaultlib %t.2.obj %t.4.obj %t.1.obj /out:%t.exe +# RUN: llvm-objdump -s %t.exe | FileCheck --check-prefix=ALL241 %s +# ALL241: Contents of section .text: +# ALL241: 180001000 04000000 04000000 .... + +# RUN: lld-link /include:symbol /dll /noentry /nodefaultlib %t.2.obj %t.1.obj /out:%t.exe +# RUN: llvm-objdump -s %t.exe | FileCheck --check-prefix=JUST21 %s +# JUST21: Contents of section .text: +# JUST21: 180001000 02000000 0200 .... + Index: lld/test/COFF/comdat-selection.s =================================================================== --- /dev/null +++ lld/test/COFF/comdat-selection.s @@ -0,0 +1,87 @@ +# REQUIRES: x86 + +# Tests handling of the comdat selection type. +# (Except associative which is tested in associative-comdat.s and +# comdat-selection-associate-largest.s instead.) + +# Create obj files with each selection type. +# RUN: sed -e s/SEL/discard/ %s | llvm-mc -triple x86_64-pc-win32 -filetype=obj -o %t.discard.obj +# RUN: sed -e s/SEL/one_only/ %s | llvm-mc -triple x86_64-pc-win32 -filetype=obj -o %t.one_only.obj +# RUN: sed -e s/SEL/same_size/ %s | llvm-mc -triple x86_64-pc-win32 -filetype=obj -o %t.same_size.obj +# RUN: sed -e s/SEL/same_size/ -e s/.long/.short/ %s | llvm-mc -triple x86_64-pc-win32 -filetype=obj -o %t.same_size.short.obj +# RUN: sed -e s/SEL/same_contents/ %s | llvm-mc -triple x86_64-pc-win32 -filetype=obj -o %t.same_contents.obj +# RUN: sed -e s/SEL/same_contents/ -e s/.long/.short/ %s | llvm-mc -triple x86_64-pc-win32 -filetype=obj -o %t.same_contents.short.obj +# RUN: sed -e s/SEL/same_contents/ -e s/1/2/ %s | llvm-mc -triple x86_64-pc-win32 -filetype=obj -o %t.same_contents.2.obj +# RUN: sed -e s/SEL/largest/ %s | llvm-mc -triple x86_64-pc-win32 -filetype=obj -o %t.largest.obj +# RUN: sed -e s/SEL/largest/ -e s/.long/.short/ -e s/1/2/ %s | llvm-mc -triple x86_64-pc-win32 -filetype=obj -o %t.largest.short.2.obj +# RUN: sed -e s/SEL/newest/ %s | llvm-mc -triple x86_64-pc-win32 -filetype=obj -o %t.newest.obj + + .section .text$nm, "", SEL, symbol + .globl symbol +symbol: + .long 1 + +# First, pass each selection type twice. All should link fine except for +# one_only which should report a duplicate symbol error and newest which +# link.exe (and hence lld-link) doesn't understand. + +# RUN: cp %t.discard.obj %t.obj && lld-link /dll /noentry /nodefaultlib %t.discard.obj %t.obj +# RUN: cp %t.one_only.obj %t.obj && not lld-link /dll /noentry /nodefaultlib %t.one_only.obj %t.obj 2>&1 | FileCheck --check-prefix=ONEONE %s +# ONEONE: lld-link: error: duplicate symbol: symbol +# RUN: cp %t.same_size.obj %t.obj && lld-link /dll /noentry /nodefaultlib %t.same_size.obj %t.obj +# RUN: cp %t.same_contents.obj %t.obj && lld-link /dll /noentry /nodefaultlib %t.same_contents.obj %t.obj +# RUN: cp %t.largest.obj %t.obj && lld-link /dll /noentry /nodefaultlib %t.largest.obj %t.obj +# RUN: cp %t.newest.obj %t.obj && not lld-link /dll /noentry /nodefaultlib %t.newest.obj %t.obj 2>&1 | FileCheck --check-prefix=NEWNEW %s +# NEWNEW: lld-link: error: unknown comdat type 7 for symbol + +# /force doesn't affect errors about unknown comdat types. +# RUN: cp %t.newest.obj %t.obj && not lld-link /force /dll /noentry /nodefaultlib %t.newest.obj %t.obj 2>&1 | FileCheck --check-prefix=NEWNEWFORCE %s +# NEWNEWFORCE: lld-link: error: unknown comdat type 7 for symbol + +# Check that same_size, same_contents, largest do what they're supposed to. + +# Check that the "same_size" selection produces an error if passed two symbols +# with different size. +# RUN: not lld-link /dll /noentry /nodefaultlib %t.same_size.obj %t.same_size.short.obj 2>&1 | FileCheck --check-prefix=SAMESIZEDUPE %s +# SAMESIZEDUPE: lld-link: error: duplicate symbol: symbol + +# Check that the "same_contents" selection produces an error if passed two +# symbols with different contents. +# RUN: not lld-link /dll /noentry /nodefaultlib %t.same_contents.obj %t.same_contents.2.obj 2>&1 | FileCheck --check-prefix=SAMECONTENTSDUPE1 %s +# SAMECONTENTSDUPE1: lld-link: error: duplicate symbol: symbol +# RUN: not lld-link /dll /noentry /nodefaultlib %t.same_contents.obj %t.same_contents.2.obj 2>&1 | FileCheck --check-prefix=SAMECONTENTSDUPE2 %s +# SAMECONTENTSDUPE2: lld-link: error: duplicate symbol: symbol + +# Check that the "largest" selection picks the larger comdat (independent of +# the order the .obj files are passed on the commandline). +# RUN: lld-link /opt:noref /include:symbol /dll /noentry /nodefaultlib %t.largest.obj %t.largest.short.2.obj /out:%t.exe +# RUN: llvm-objdump -s %t.exe | FileCheck --check-prefix=LARGEST1 %s +# LARGEST1: Contents of section .text: +# LARGEST1: 180001000 01000000 .... + +# FIXME: Make this pass when /opt:noref is passed. +# RUN: lld-link /include:symbol /dll /noentry /nodefaultlib %t.largest.short.2.obj %t.largest.obj /out:%t.exe +# RUN: llvm-objdump -s %t.exe | FileCheck --check-prefix=LARGEST2 %s +# LARGEST2: Contents of section .text: +# LARGEST2: 180001000 01000000 .... + + +# Test linking the same symbol with different comdat selection types. +# link.exe generally rejects this, except for "largest" which is allowed to +# combine with everything (https://bugs.llvm.org/show_bug.cgi?id=40094#c7). +# lld-link rejects all comdat selection type mismatches. Spot-test just a few +# combinations. + +# RUN: not lld-link /dll /noentry /nodefaultlib %t.discard.obj %t.one_only.obj 2>&1 | FileCheck --check-prefix=DISCARDONE %s +# DISCARDONE: lld-link: error: conflicting comdat type for symbol: 2 in +# RUN: lld-link /force /dll /noentry /nodefaultlib %t.discard.obj %t.one_only.obj 2>&1 | FileCheck --check-prefix=DISCARDONEFORCE %s +# DISCARDONEFORCE: lld-link: warning: conflicting comdat type for symbol: 2 in + +# RUN: not lld-link /dll /noentry /nodefaultlib %t.same_contents.obj %t.same_size.obj 2>&1 | FileCheck --check-prefix=CONTENTSSIZE %s +# CONTENTSSIZE: lld-link: error: conflicting comdat type for symbol: 4 in + +# These cases are accepted by link.exe but not by lld-link. +# RUN: not lld-link /dll /noentry /nodefaultlib %t.discard.obj %t.largest.obj 2>&1 | FileCheck --check-prefix=DISCARDLARGEST %s +# DISCARDLARGEST: lld-link: error: conflicting comdat type for symbol: 2 in +# RUN: not lld-link /dll /noentry /nodefaultlib %t.largest.obj %t.one_only.obj 2>&1 | FileCheck --check-prefix=LARGESTONE %s +# LARGESTONE: lld-link: error: conflicting comdat type for symbol: 6 in