Index: ELF/LinkerScript.cpp =================================================================== --- ELF/LinkerScript.cpp +++ ELF/LinkerScript.cpp @@ -1230,12 +1230,12 @@ // directory specified with the -L option. if (sys::fs::exists(Tok)) { if (Optional MB = readFile(Tok)) - tokenize(*MB); + Sources.push_back({*MB}); return; } if (Optional Path = findFromSearchPaths(Tok)) { if (Optional MB = readFile(*Path)) - tokenize(*MB); + Sources.push_back({*MB}); return; } setError("cannot open " + Tok); Index: ELF/ScriptParser.h =================================================================== --- ELF/ScriptParser.h +++ ELF/ScriptParser.h @@ -24,7 +24,6 @@ explicit ScriptParserBase(MemoryBufferRef MB); void setError(const Twine &Msg); - void tokenize(MemoryBufferRef MB); static StringRef skipSpace(StringRef S); bool atEOF(); StringRef next(); @@ -34,17 +33,22 @@ void expect(StringRef Expect); std::string getCurrentLocation(); - std::vector MBs; - std::vector Tokens; - size_t Pos = 0; bool Error = false; + struct Source { + Source(MemoryBufferRef MB) : MB(MB), Data(skipSpace(MB.getBuffer())) {} + MemoryBufferRef MB; + StringRef Data; + StringRef LastTok; + }; + std::vector Sources; + private: + StringRef readToken(bool Peek); + StringRef getLine(); size_t getLineNumber(); size_t getColumnNumber(); - - MemoryBufferRef getCurrentMB(); }; } // namespace elf Index: ELF/ScriptParser.cpp =================================================================== --- ELF/ScriptParser.cpp +++ ELF/ScriptParser.cpp @@ -22,9 +22,9 @@ // Returns a whole line containing the current token. StringRef ScriptParserBase::getLine() { - StringRef S = getCurrentMB().getBuffer(); - StringRef Tok = Tokens[Pos - 1]; - + StringRef S = Sources.back().MB.getBuffer(); + StringRef Tok = Sources.back().LastTok; + assert(!Tok.empty()); size_t Pos = S.rfind('\n', Tok.data() - S.data()); if (Pos != StringRef::npos) S = S.substr(Pos + 1); @@ -33,25 +33,27 @@ // Returns 1-based line number of the current token. size_t ScriptParserBase::getLineNumber() { - StringRef S = getCurrentMB().getBuffer(); - StringRef Tok = Tokens[Pos - 1]; + StringRef S = Sources.back().MB.getBuffer(); + StringRef Tok = Sources.back().LastTok; + if (Tok.empty()) + return 1; return S.substr(0, Tok.data() - S.data()).count('\n') + 1; } // Returns 0-based column number of the current token. size_t ScriptParserBase::getColumnNumber() { - StringRef Tok = Tokens[Pos - 1]; + StringRef Tok = Sources.back().LastTok; + if (Tok.empty()) + return 0; return Tok.data() - getLine().data(); } std::string ScriptParserBase::getCurrentLocation() { - std::string Filename = getCurrentMB().getBufferIdentifier(); - if (!Pos) - return Filename; + std::string Filename = Sources.back().MB.getBufferIdentifier(); return (Filename + ":" + Twine(getLineNumber())).str(); } -ScriptParserBase::ScriptParserBase(MemoryBufferRef MB) { tokenize(MB); } +ScriptParserBase::ScriptParserBase(MemoryBufferRef MB) : Sources({{MB}}) {} // We don't want to record cascading errors. Keep only the first one. void ScriptParserBase::setError(const Twine &Msg) { @@ -59,62 +61,56 @@ return; Error = true; - if (!Pos) { - error(getCurrentLocation() + ": " + Msg); - return; - } - std::string S = getCurrentLocation() + ": "; error(S + Msg); error(S + getLine()); error(S + std::string(getColumnNumber(), ' ') + "^"); } -// Split S into linker script tokens. -void ScriptParserBase::tokenize(MemoryBufferRef MB) { - std::vector Vec; - MBs.push_back(MB); - StringRef S = MB.getBuffer(); - StringRef Begin = S; +StringRef ScriptParserBase::readToken(bool Peek) { + if (atEOF()) { + setError("unexpected EOF"); + return ""; + } - for (;;) { - S = skipSpace(S); - if (S.empty()) - break; - - // Quoted token. Note that double-quote characters are parts of a token - // because, in a glob match context, only unquoted tokens are interpreted - // as glob patterns. Double-quoted tokens are literal patterns in that - // context. - if (S.startswith("\"")) { - size_t E = S.find("\"", 1); - if (E == StringRef::npos) { - StringRef Filename = MB.getBufferIdentifier(); - size_t Lineno = Begin.substr(0, S.data() - Begin.data()).count('\n'); - error(Filename + ":" + Twine(Lineno + 1) + ": unclosed quote"); - return; - } + StringRef &S = Sources.back().Data; + if (S.empty()) { + Sources.pop_back(); + return readToken(Peek); + } - Vec.push_back(S.take_front(E + 1)); - S = S.substr(E + 1); - continue; + // Quoted token. Note that double-quote characters are parts of a token + // because, in a glob match context, only unquoted tokens are interpreted + // as glob patterns. Double-quoted tokens are literal patterns in that + // context. + if (S.startswith("\"")) { + size_t E = S.find("\"", 1); + if (E == StringRef::npos) { + Sources.back().LastTok = S; + setError("unclosed quote"); + return ""; } - // Unquoted token. This is more relaxed than tokens in C-like language, - // so that you can write "file-name.cpp" as one bare token, for example. - size_t Pos = S.find_first_not_of( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - "0123456789_.$/\\~=+[]*?-!<>^"); - - // A character that cannot start a word (which is usually a - // punctuation) forms a single character token. - if (Pos == 0) - Pos = 1; - Vec.push_back(S.substr(0, Pos)); - S = S.substr(Pos); + StringRef Tok = S.take_front(E + 1); + if (!Peek) + S = skipSpace(S.substr(E + 1)); + return Tok; } - Tokens.insert(Tokens.begin() + Pos, Vec.begin(), Vec.end()); + // Unquoted token. This is more relaxed than tokens in C-like language, + // so that you can write "file-name.cpp" as one bare token, for example. + size_t Pos = + S.find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789_.$/\\~=+[]*?-!<>^"); + + // A character that cannot start a word (which is usually a + // punctuation) forms a single character token. + if (Pos == 0) + Pos = 1; + StringRef Tok = S.substr(0, Pos); + if (!Peek) + S = skipSpace(S.substr(Pos)); + return Tok; } // Skip leading whitespace characters or comments. @@ -144,26 +140,24 @@ } // An erroneous token is handled as if it were the last token before EOF. -bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; } - -StringRef ScriptParserBase::next() { - if (Error) - return ""; - if (atEOF()) { - setError("unexpected EOF"); - return ""; - } - return Tokens[Pos++]; +bool ScriptParserBase::atEOF() { + if (Error || Sources.empty()) + return true; + for (Source &S : Sources) + if (!S.Data.empty()) + return false; + return true; } -StringRef ScriptParserBase::peek() { - StringRef Tok = next(); - if (Error) - return ""; - --Pos; +StringRef ScriptParserBase::next() { + StringRef Tok = readToken(false); + if (!Tok.empty()) + Sources.back().LastTok = Tok; return Tok; } +StringRef ScriptParserBase::peek() { return readToken(true); } + bool ScriptParserBase::consume(StringRef Tok) { if (peek() == Tok) { skip(); @@ -186,15 +180,3 @@ static bool encloses(StringRef S, StringRef T) { return S.bytes_begin() <= T.bytes_begin() && T.bytes_end() <= S.bytes_end(); } - -MemoryBufferRef ScriptParserBase::getCurrentMB() { - // Find input buffer containing the current token. - assert(!MBs.empty()); - if (!Pos) - return MBs[0]; - - for (MemoryBufferRef MB : MBs) - if (encloses(MB.getBuffer(), Tokens[Pos - 1])) - return MB; - llvm_unreachable("getCurrentMB: failed to find a token"); -} Index: test/ELF/linkerscript/diagnostic.s =================================================================== --- test/ELF/linkerscript/diagnostic.s +++ test/ELF/linkerscript/diagnostic.s @@ -70,7 +70,9 @@ # RUN: echo "\"" >> %t.script # RUN: not ld.lld -shared %t -o %t1 --script %t.script 2>&1 | \ # RUN: FileCheck -check-prefix=ERR8 -strict-whitespace %s -# ERR8: {{.*}}.script:2: unclosed quote +# ERR8: {{.*}}.script:2: unclosed quote +# ERR8-NEXT: {{.*}}.script:2: " +# ERR8-NEXT: {{.*}}.script:2: ^ ## Check tokenize() error in included script file # RUN: echo "SECTIONS {}" > %t.script.inc @@ -78,7 +80,9 @@ # RUN: echo "INCLUDE \"%t.script.inc\"" > %t.script # RUN: not ld.lld -shared %t -o %t1 --script %t.script 2>&1 | \ # RUN: FileCheck -check-prefix=ERR9 -strict-whitespace %s -# ERR9: {{.*}}.script.inc:2: unclosed quote +# ERR9: {{.*}}.script.inc:2: unclosed quote +# ERR9-NEXT: {{.*}}.script.inc:2: " +# ERR9-NEXT: {{.*}}.script.inc:2: ^ ## Check error reporting correctness for included files. # RUN: echo "SECTIONS {" > %t.script.inc Index: test/ELF/version-script-err.s =================================================================== --- test/ELF/version-script-err.s +++ test/ELF/version-script-err.s @@ -6,6 +6,7 @@ // RUN: echo "\"" > %terr1.script // RUN: not ld.lld --version-script %terr1.script -shared %t.o -o %t.so 2>&1 | \ -// RUN: FileCheck -check-prefix=ERR1 %s -// ERR1: {{.*}}:1: unclosed quote -// ERR1-NEXT: {{.*}}: unexpected EOF +// RUN: FileCheck -strict-whitespace -check-prefix=ERR1 %s +// ERR1: {{.*}}:1: unclosed quote +// ERR1-NEXT: {{.*}}:1: " +// ERR1-NEXT: {{.*}}:1: ^