Index: ELF/LinkerScript.cpp =================================================================== --- ELF/LinkerScript.cpp +++ ELF/LinkerScript.cpp @@ -937,17 +937,20 @@ return 0; } -class elf::ScriptParser : public ScriptParserBase { +class elf::ScriptParser final : public ScriptParserBase { typedef void (ScriptParser::*Handler)(); public: - ScriptParser(StringRef S, bool B) : ScriptParserBase(S), IsUnderSysroot(B) {} + ScriptParser(MemoryBufferRef MB, bool B) : IsUnderSysroot(B) { + addScriptFile(MB.getBufferIdentifier(), MB.getBuffer()); + } void readLinkerScript(); void readVersionScript(); private: void addFile(StringRef Path); + void addScriptFile(StringRef Path, StringRef Data); void readAsNeeded(); void readEntry(); @@ -994,8 +997,15 @@ void readLocal(StringRef VerStr); void readSymbols(std::vector &V); + void setError(const Twine &Msg) override; + ScriptConfiguration &Opt = *ScriptConfig; bool IsUnderSysroot; + + // Stores list of files in array of intervals. The second parameter is file + // data, which can be splitted into two smaller StringRef's in case there is + // INCLUDE directive. + std::vector> Files; }; void ScriptParser::readVersionScript() { @@ -1096,6 +1106,80 @@ } } +// Returns true if string 'Bigger' contains string 'Shorter'. +static bool containsString(StringRef Bigger, StringRef Shorter) { + const char *BiggerEnd = Bigger.data() + Bigger.size(); + const char *ShorterEnd = Shorter.data() + Shorter.size(); + + return Bigger.data() <= Shorter.data() && BiggerEnd >= ShorterEnd; +} + +void ScriptParser::setError(const Twine &Msg) { + if (Error) + return; + + assert(!Files.empty()); + Error = true; + if (!Pos) { + error(Files.front().first + " (1): " + Msg); + return; + } + + // Find piece of input data which contains token which has + // raised an error. When found report file name and line + // number. + for (auto &F : Files) + if (containsString(F.second, Tokens[Pos - 1])) { + // If we have valid token we can calculate line and column + // numbers to provide better error diagnostics. + StringRef Line = getTokenLine(F.second, current()); + auto ErrLoc = getTokenLocation(F.second, current()); + const Twine &Location = F.first + " (" + Twine(ErrLoc.first) + "): "; + error(Location + Msg); + error(Location + Line); + error(Location + std::string(ErrLoc.second, ' ') + "^"); + break; + } +} + +// Tokenizes script data and also saves script file name and data for error +// reporting. To get file name and line from token position we store file data +// in array of non-overlapping intervals. Each interval covers specific range +// of tokens corresponding to a single file. When we read INCLUDE diretive +// interval array is transformed in a following way: +// +// {a.t, a_Data} - INCLUDE "b.t" -> {a.t, a_Data1} {b.t, b_Data} {a.t, a_Data2} +// +// Data of "a.t" is split into 2 pieces Data1 and Data2. First one covers range +// from the beginning of "a.t" till the end of INCLUDE directive. Second one +// covers range from the first symbol after INCLUDE directive till the end of +// the file +void ScriptParser::addScriptFile(StringRef Path, StringRef Data) { + std::vector V = tokenize(Path, Data); + if (Tokens.empty()) + Tokens = std::move(V); + else + Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end()); + + std::vector> Items = {{Path, Data}}; + auto It = Files.begin(); + for (; It != Files.end(); ++It) { + const char *TokenEnd = Tokens[Pos - 1].data() + Tokens[Pos - 1].size(); + const char *FileBegin = It->second.data(); + const char *FileEnd = FileBegin + It->second.size(); + if (TokenEnd >= FileBegin && TokenEnd <= FileEnd) { + // Split interval into 2 smaller ones, if we have INCLUDE directive. + if (TokenEnd < FileEnd) { + It->second = StringRef(FileBegin, TokenEnd - FileBegin); + Items.emplace_back(It->first, StringRef(TokenEnd, FileEnd - TokenEnd)); + } + ++It; + break; + } + } + Files.insert(It, Items.begin(), Items.end()); +} + void ScriptParser::readAsNeeded() { expect("("); bool Orig = Config->AsNeeded; @@ -1140,8 +1224,7 @@ } std::unique_ptr &MB = *MBOrErr; StringRef S = Saver.save(MB->getMemBufferRef().getBuffer()); - std::vector V = tokenize(S); - Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end()); + addScriptFile(unquote(Tok), S); } void ScriptParser::readOutput() { @@ -1877,11 +1960,11 @@ void elf::readLinkerScript(MemoryBufferRef MB) { StringRef Path = MB.getBufferIdentifier(); - ScriptParser(MB.getBuffer(), isUnderSysroot(Path)).readLinkerScript(); + ScriptParser(MB, isUnderSysroot(Path)).readLinkerScript(); } void elf::readVersionScript(MemoryBufferRef MB) { - ScriptParser(MB.getBuffer(), false).readVersionScript(); + ScriptParser(MB, false).readVersionScript(); } template class elf::LinkerScript; Index: ELF/ScriptParser.h =================================================================== --- ELF/ScriptParser.h +++ ELF/ScriptParser.h @@ -18,24 +18,31 @@ namespace lld { namespace elf { +// Gets line containing a token. +StringRef getTokenLine(StringRef Data, StringRef Token); + +// Gets token line and column as std::pair. +std::pair getTokenLocation(StringRef Data, StringRef Tok); + class ScriptParserBase { public: - explicit ScriptParserBase(StringRef S) : Input(S), Tokens(tokenize(S)) {} + ScriptParserBase() = default; + explicit ScriptParserBase(StringRef S); protected: - void setError(const Twine &Msg); - static std::vector tokenize(StringRef S); + ~ScriptParserBase() = default; + + virtual void setError(const Twine &Msg); + static std::vector tokenize(StringRef Filename, StringRef S); static StringRef skipSpace(StringRef S); bool atEOF(); StringRef next(); StringRef peek(); + StringRef current(); void skip(); bool consume(StringRef Tok); void expect(StringRef Expect); - size_t getPos(); - void printErrorPos(); - StringRef Input; std::vector Tokens; size_t Pos = 0; Index: ELF/ScriptParser.cpp =================================================================== --- ELF/ScriptParser.cpp +++ ELF/ScriptParser.cpp @@ -20,8 +20,9 @@ using namespace lld; using namespace lld::elf; -// Returns the line that the character S[Pos] is in. -static StringRef getLine(StringRef S, size_t Pos) { +// Returns the line that the token Tok is in. +StringRef elf::getTokenLine(StringRef S, StringRef Tok) { + size_t Pos = Tok.data() - S.data(); size_t Begin = S.rfind('\n', Pos); size_t End = S.find('\n', Pos); Begin = (Begin == StringRef::npos) ? 0 : Begin + 1; @@ -31,30 +32,44 @@ return S.substr(Begin, End - Begin).rtrim(); } -void ScriptParserBase::printErrorPos() { - StringRef Tok = Tokens[Pos == 0 ? 0 : Pos - 1]; - StringRef Line = getLine(Input, Tok.data() - Input.data()); +// Returns the current line number. +static size_t getLineNo(StringRef Data, StringRef Tok) { + return StringRef(Data.data(), Tok.data() - Data.data()).count('\n') + 1; +} + +std::pair elf::getTokenLocation(StringRef Data, StringRef Tok) { + StringRef Line = getTokenLine(Data, Tok); + return {getLineNo(Data, Tok), Tok.data() - Line.data()}; +} + +static void printErrorPos(StringRef Input, StringRef Tok) { + StringRef Line = getTokenLine(Input, Tok); size_t Col = Tok.data() - Line.data(); error(Line); error(std::string(Col, ' ') + "^"); } +ScriptParserBase::ScriptParserBase(StringRef S) + : Input(S), Tokens(tokenize(StringRef(), S)) {} + // We don't want to record cascading errors. Keep only the first one. void ScriptParserBase::setError(const Twine &Msg) { if (Error) return; - if (Input.empty() || Tokens.empty()) { + if (Pos == 0) { error(Msg); } else { - error("line " + Twine(getPos()) + ": " + Msg); - printErrorPos(); + error("line " + Twine(getLineNo(Input, current())) + ": " + Msg); + printErrorPos(Input, current()); } Error = true; } // Split S into linker script tokens. -std::vector ScriptParserBase::tokenize(StringRef S) { +std::vector ScriptParserBase::tokenize(StringRef Filename, + StringRef S) { std::vector Ret; + StringRef Data = S; for (;;) { S = skipSpace(S); if (S.empty()) @@ -67,7 +82,11 @@ if (S.startswith("\"")) { size_t E = S.find("\"", 1); if (E == StringRef::npos) { - error("unclosed quote"); + if (!Filename.empty()) + error(Filename + " (" + Twine(getLineNo(Data, S)) + + "): unclosed quote"); + else + error("unclosed quote"); return {}; } Ret.push_back(S.take_front(E + 1)); @@ -137,6 +156,11 @@ return Tok; } +StringRef ScriptParserBase::current() { + assert(Pos); + return Tokens[Pos - 1]; +} + bool ScriptParserBase::consume(StringRef Tok) { if (peek() == Tok) { skip(); @@ -154,12 +178,3 @@ if (Tok != Expect) setError(Expect + " expected, but got " + Tok); } - -// Returns the current line number. -size_t ScriptParserBase::getPos() { - if (Pos == 0) - return 1; - const char *Begin = Input.data(); - const char *Tok = Tokens[Pos - 1].data(); - return StringRef(Begin, Tok - Begin).count('\n') + 1; -} Index: test/ELF/linkerscript/diagnostic.s =================================================================== --- test/ELF/linkerscript/diagnostic.s +++ test/ELF/linkerscript/diagnostic.s @@ -20,7 +20,7 @@ # RUN: echo "comment line 2 */" >> %t.script # RUN: echo ".temp : { *(.temp) } }" >> %t.script # RUN: not ld.lld -shared %t -o %t1 --script %t.script 2>&1 | FileCheck -check-prefix=ERR1 %s -# ERR1: line 2: +# ERR1: {{.*}} (2): ## Change ":" to "+" at line 3 now, check correct error line number: # RUN: echo "SECTIONS {" > %t.script @@ -30,7 +30,7 @@ # RUN: echo "comment line 2 */" >> %t.script # RUN: echo ".temp : { *(.temp) } }" >> %t.script # RUN: not ld.lld -shared %t -o %t1 --script %t.script 2>&1 | FileCheck -check-prefix=ERR2 %s -# ERR2: line 3: +# ERR2: {{.*}} (3): ## Change ":" to "+" at line 6, after multiline comment, ## check correct error line number: @@ -41,7 +41,7 @@ # RUN: echo "comment line 2 */" >> %t.script # RUN: echo ".temp + { *(.temp) } }" >> %t.script # RUN: not ld.lld -shared %t -o %t1 --script %t.script 2>&1 | FileCheck -check-prefix=ERR5 %s -# ERR5: line 6: +# ERR5: {{.*}} (6): ## Check that text of lines and pointer to 'bad' token are working ok. # RUN: echo "UNKNOWN_TAG {" > %t.script @@ -50,9 +50,9 @@ # RUN: echo ".temp : { *(.temp) } }" >> %t.script # RUN: not ld.lld -shared %t -o %t1 --script %t.script 2>&1 | \ # RUN: FileCheck -check-prefix=ERR6 -strict-whitespace %s -# ERR6: error: line 1: -# ERR6-NEXT: error: UNKNOWN_TAG { -# ERR6-NEXT: error: ^ +# ERR6: error: {{.*}} (1): +# ERR6-NEXT: error: {{.*}} (1): UNKNOWN_TAG { +# ERR6-NEXT: error: {{.*}} (1): ^ ## One more check that text of lines and pointer to 'bad' token are working ok. # RUN: echo "SECTIONS {" > %t.script @@ -61,6 +61,33 @@ # RUN: echo "boom .temp : { *(.temp) } }" >> %t.script # RUN: not ld.lld -shared %t -o %t1 --script %t.script 2>&1 | \ # RUN: FileCheck -check-prefix=ERR7 -strict-whitespace %s -# ERR7: error: line 4: malformed number: .temp -# ERR7-NEXT: error: boom .temp : { *(.temp) } } -# ERR7-NEXT: error: ^ +# ERR7: error: {{.*}} (4): malformed number: .temp +# ERR7-NEXT: error: {{.*}} (4): boom .temp : { *(.temp) } } +# ERR7-NEXT: error: {{.*}} (4): ^ + +## Check tokenize() error +# RUN: echo "SECTIONS {}" > %t.script +# RUN: echo "\"" >> %t.script +# RUN: not ld.lld -shared %t -o %t1 --script %t.script 2>&1 | \ +# RUN: FileCheck -check-prefix=ERR8 -strict-whitespace %s +# ERR8: {{.*}} (2): unclosed quote + +## Check tokenize() error in included script file +# RUN: echo "SECTIONS {}" > %t.script.inc +# RUN: echo "\"" >> %t.script.inc +# RUN: echo "INCLUDE \"%t.script.inc\"" > %t.script +# RUN: not ld.lld -shared %t -o %t1 --script %t.script 2>&1 | \ +# RUN: FileCheck -check-prefix=ERR9 -strict-whitespace %s +# ERR9: {{.*}}.inc (2): unclosed quote + +## Check error reporting correctness for included files. +# RUN: echo "SECTIONS {" > %t.script.inc +# RUN: echo ".text : { *(.text) }" >> %t.script.inc +# RUN: echo ".keep : { *(.keep) }" >> %t.script.inc +# RUN: echo "boom .temp : { *(.temp) } }" >> %t.script.inc +# RUN: echo "INCLUDE \"%t.script.inc\"" > %t.script +# RUN: not ld.lld -shared %t -o %t1 --script %t.script 2>&1 | \ +# RUN: FileCheck -check-prefix=ERR10 -strict-whitespace %s +# ERR10: error: {{.*}}.inc (4): malformed number: .temp +# ERR10-NEXT: error: {{.*}}.inc (4): boom .temp : { *(.temp) } } +# ERR10-NEXT: error: {{.*}}.inc (4): ^