Index: ELF/LinkerScript.cpp =================================================================== --- ELF/LinkerScript.cpp +++ ELF/LinkerScript.cpp @@ -1238,12 +1238,12 @@ // directory specified with the -L option. if (sys::fs::exists(Tok)) { if (Optional MB = readFile(Tok)) - tokenize(*MB); + Sources.push_back({*MB}); return; } if (Optional Path = findFromSearchPaths(Tok)) { if (Optional MB = readFile(*Path)) - tokenize(*MB); + Sources.push_back({*MB}); return; } setError("cannot open " + Tok); @@ -1631,9 +1631,24 @@ return new SymbolAssignment(Name, E); } +// Helper for switching tokenizer mode. +struct ModeSwitcher { + ModeSwitcher(ScriptParserBase *P, ScriptParserBase::TokenizerMode New) + : P(P) { + Old = P->Mode; + P->Mode = New; + } + ~ModeSwitcher() { P->Mode = Old; } + ScriptParserBase *P; + ScriptParserBase::TokenizerMode Old; +}; + // This is an operator-precedence parser to parse a linker // script expression. -Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } +Expr ScriptParser::readExpr() { + ModeSwitcher M(this, MathExprMode); + return readExpr1(readPrimary(), 0); +} static Expr combine(StringRef Op, Expr L, Expr R) { auto IsAbs = [=] { return L.IsAbsolute() && R.IsAbsolute(); }; @@ -1684,6 +1699,8 @@ // This is a part of the operator-precedence parser. This function // assumes that the remaining token stream starts with an operator. Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { + ModeSwitcher M(this, MathExprMode); + while (!atEOF() && !Error) { // Read an operator and an expression. if (consume("?")) Index: ELF/ScriptParser.h =================================================================== --- ELF/ScriptParser.h +++ ELF/ScriptParser.h @@ -21,10 +21,11 @@ class ScriptParserBase { public: + enum TokenizerMode { RegularMode, MathExprMode }; + explicit ScriptParserBase(MemoryBufferRef MB); void setError(const Twine &Msg); - void tokenize(MemoryBufferRef MB); static StringRef skipSpace(StringRef S); bool atEOF(); StringRef next(); @@ -34,17 +35,25 @@ void expect(StringRef Expect); std::string getCurrentLocation(); - std::vector MBs; - std::vector Tokens; - size_t Pos = 0; bool Error = false; + // Each file parsed represented as a source. + struct Source { + Source(MemoryBufferRef MB) : MB(MB), Data(skipSpace(MB.getBuffer())) {} + MemoryBufferRef MB; // Initial memory buffer. + StringRef Data; // Data left to parse. + StringRef LastTok; // Last parsed token. Used for error reporting. + }; + std::vector Sources; + + TokenizerMode Mode = RegularMode; + private: + StringRef readToken(bool Peek); + StringRef getLine(); size_t getLineNumber(); size_t getColumnNumber(); - - MemoryBufferRef getCurrentMB(); }; } // namespace elf Index: ELF/ScriptParser.cpp =================================================================== --- ELF/ScriptParser.cpp +++ ELF/ScriptParser.cpp @@ -22,9 +22,9 @@ // Returns a whole line containing the current token. StringRef ScriptParserBase::getLine() { - StringRef S = getCurrentMB().getBuffer(); - StringRef Tok = Tokens[Pos - 1]; - + StringRef S = Sources.back().MB.getBuffer(); + StringRef Tok = Sources.back().LastTok; + assert(!Tok.empty()); size_t Pos = S.rfind('\n', Tok.data() - S.data()); if (Pos != StringRef::npos) S = S.substr(Pos + 1); @@ -33,25 +33,27 @@ // Returns 1-based line number of the current token. size_t ScriptParserBase::getLineNumber() { - StringRef S = getCurrentMB().getBuffer(); - StringRef Tok = Tokens[Pos - 1]; + StringRef S = Sources.back().MB.getBuffer(); + StringRef Tok = Sources.back().LastTok; + if (Tok.empty()) + return 1; return S.substr(0, Tok.data() - S.data()).count('\n') + 1; } // Returns 0-based column number of the current token. size_t ScriptParserBase::getColumnNumber() { - StringRef Tok = Tokens[Pos - 1]; + StringRef Tok = Sources.back().LastTok; + if (Tok.empty()) + return 0; return Tok.data() - getLine().data(); } std::string ScriptParserBase::getCurrentLocation() { - std::string Filename = getCurrentMB().getBufferIdentifier(); - if (!Pos) - return Filename; + std::string Filename = Sources.back().MB.getBufferIdentifier(); return (Filename + ":" + Twine(getLineNumber())).str(); } -ScriptParserBase::ScriptParserBase(MemoryBufferRef MB) { tokenize(MB); } +ScriptParserBase::ScriptParserBase(MemoryBufferRef MB) : Sources({{MB}}) {} // We don't want to record cascading errors. Keep only the first one. void ScriptParserBase::setError(const Twine &Msg) { @@ -59,62 +61,63 @@ return; Error = true; - if (!Pos) { - error(getCurrentLocation() + ": " + Msg); - return; - } - std::string S = getCurrentLocation() + ": "; error(S + Msg); error(S + getLine()); error(S + std::string(getColumnNumber(), ' ') + "^"); } -// Split S into linker script tokens. -void ScriptParserBase::tokenize(MemoryBufferRef MB) { - std::vector Vec; - MBs.push_back(MB); - StringRef S = MB.getBuffer(); - StringRef Begin = S; +StringRef ScriptParserBase::readToken(bool Peek) { + if (atEOF()) { + setError("unexpected EOF"); + return ""; + } - for (;;) { - S = skipSpace(S); - if (S.empty()) - break; - - // Quoted token. Note that double-quote characters are parts of a token - // because, in a glob match context, only unquoted tokens are interpreted - // as glob patterns. Double-quoted tokens are literal patterns in that - // context. - if (S.startswith("\"")) { - size_t E = S.find("\"", 1); - if (E == StringRef::npos) { - StringRef Filename = MB.getBufferIdentifier(); - size_t Lineno = Begin.substr(0, S.data() - Begin.data()).count('\n'); - error(Filename + ":" + Twine(Lineno + 1) + ": unclosed quote"); - return; - } + StringRef &S = Sources.back().Data; + if (S.empty()) { + Sources.pop_back(); + return readToken(Peek); + } - Vec.push_back(S.take_front(E + 1)); - S = S.substr(E + 1); - continue; + // Quoted token. Note that double-quote characters are parts of a token + // because, in a glob match context, only unquoted tokens are interpreted + // as glob patterns. Double-quoted tokens are literal patterns in that + // context. + if (S.startswith("\"")) { + size_t E = S.find("\"", 1); + if (E == StringRef::npos) { + Sources.back().LastTok = S; + setError("unclosed quote"); + return ""; } - // Unquoted token. This is more relaxed than tokens in C-like language, - // so that you can write "file-name.cpp" as one bare token, for example. - size_t Pos = S.find_first_not_of( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - "0123456789_.$/\\~=+[]*?-!<>^"); - - // A character that cannot start a word (which is usually a - // punctuation) forms a single character token. - if (Pos == 0) - Pos = 1; - Vec.push_back(S.substr(0, Pos)); - S = S.substr(Pos); + StringRef Tok = S.take_front(E + 1); + if (!Peek) + S = skipSpace(S.substr(E + 1)); + return Tok; } - Tokens.insert(Tokens.begin() + Pos, Vec.begin(), Vec.end()); + // Unquoted token. + // This is more relaxed than tokens in C-like language, + // so that you can write "file-name.cpp" as one bare token, for example. + StringRef AllowedChars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789_.$\\~=[]?!<>^*/+-"; + // When parsing math expressions, operators are separate tokens. + // So we can parse 1*2 as 3 tokens, while regular parse mode would + // parse it as single token, assuming it is file name mask for example. + if (Mode == MathExprMode) + AllowedChars = AllowedChars.take_front(AllowedChars.rfind('*')); + + size_t Pos = S.find_first_not_of(AllowedChars); + // A character that cannot start a word (which is usually a + // punctuation) forms a single character token. + if (Pos == 0) + Pos = 1; + StringRef Tok = S.substr(0, Pos); + if (!Peek) + S = skipSpace(S.substr(Pos)); + return Tok; } // Skip leading whitespace characters or comments. @@ -144,26 +147,24 @@ } // An erroneous token is handled as if it were the last token before EOF. -bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; } - -StringRef ScriptParserBase::next() { - if (Error) - return ""; - if (atEOF()) { - setError("unexpected EOF"); - return ""; - } - return Tokens[Pos++]; +bool ScriptParserBase::atEOF() { + if (Error || Sources.empty()) + return true; + for (Source &S : Sources) + if (!S.Data.empty()) + return false; + return true; } -StringRef ScriptParserBase::peek() { - StringRef Tok = next(); - if (Error) - return ""; - --Pos; +StringRef ScriptParserBase::next() { + StringRef Tok = readToken(false); + if (!Tok.empty()) + Sources.back().LastTok = Tok; return Tok; } +StringRef ScriptParserBase::peek() { return readToken(true); } + bool ScriptParserBase::consume(StringRef Tok) { if (peek() == Tok) { skip(); @@ -186,15 +187,3 @@ static bool encloses(StringRef S, StringRef T) { return S.bytes_begin() <= T.bytes_begin() && T.bytes_end() <= S.bytes_end(); } - -MemoryBufferRef ScriptParserBase::getCurrentMB() { - // Find input buffer containing the current token. - assert(!MBs.empty()); - if (!Pos) - return MBs[0]; - - for (MemoryBufferRef MB : MBs) - if (encloses(MB.getBuffer(), Tokens[Pos - 1])) - return MB; - llvm_unreachable("getCurrentMB: failed to find a token"); -} Index: test/ELF/linkerscript/diagnostic.s =================================================================== --- test/ELF/linkerscript/diagnostic.s +++ test/ELF/linkerscript/diagnostic.s @@ -70,7 +70,9 @@ # RUN: echo "\"" >> %t.script # RUN: not ld.lld -shared %t -o %t1 --script %t.script 2>&1 | \ # RUN: FileCheck -check-prefix=ERR8 -strict-whitespace %s -# ERR8: {{.*}}.script:2: unclosed quote +# ERR8: {{.*}}.script:2: unclosed quote +# ERR8-NEXT: {{.*}}.script:2: " +# ERR8-NEXT: {{.*}}.script:2: ^ ## Check tokenize() error in included script file # RUN: echo "SECTIONS {}" > %t.script.inc @@ -78,7 +80,9 @@ # RUN: echo "INCLUDE \"%t.script.inc\"" > %t.script # RUN: not ld.lld -shared %t -o %t1 --script %t.script 2>&1 | \ # RUN: FileCheck -check-prefix=ERR9 -strict-whitespace %s -# ERR9: {{.*}}.script.inc:2: unclosed quote +# ERR9: {{.*}}.script.inc:2: unclosed quote +# ERR9-NEXT: {{.*}}.script.inc:2: " +# ERR9-NEXT: {{.*}}.script.inc:2: ^ ## Check error reporting correctness for included files. # RUN: echo "SECTIONS {" > %t.script.inc Index: test/ELF/linkerscript/locationcounter.s =================================================================== --- test/ELF/linkerscript/locationcounter.s +++ test/ELF/linkerscript/locationcounter.s @@ -46,7 +46,10 @@ # RUN: .shiftl : { *(.shiftl) } \ # RUN: . = 0x30000 + (1 + 1023 >> 2); \ # RUN: .shiftr : { *(.shiftr) } \ - +# RUN: . = 0x31000+1*(2-1)+4*0x1000/2; \ +# RUN: .tokenizer1 : { *(.tokenizer1) } \ +# RUN: . = ABSOLUTE(0x32000+1*(2-1)+4*0x1000/2); \ +# RUN: .tokenizer2 : { *(.tokenizer2) } \ # RUN: }" > %t.script # RUN: ld.lld %t --script %t.script -o %t2 # RUN: llvm-objdump -section-headers %t2 | FileCheck %s @@ -72,6 +75,8 @@ # CHECK: .unary {{.*}} 000000000002a000 # CHECK: .shiftl {{.*}} 0000000000030040 # CHECK: .shiftr {{.*}} 0000000000030100 +# CHECK: .tokenizer1 {{.*}} 0000000000033001 +# CHECK: .tokenizer2 {{.*}} 0000000000034001 ## Mailformed number error. # RUN: echo "SECTIONS { \ @@ -187,3 +192,9 @@ .section .shiftr, "a" .quad 0 + +.section .tokenizer1, "a" +.quad 0 + +.section .tokenizer2, "a" +.quad 0 Index: test/ELF/version-script-err.s =================================================================== --- test/ELF/version-script-err.s +++ test/ELF/version-script-err.s @@ -6,6 +6,7 @@ // RUN: echo "\"" > %terr1.script // RUN: not ld.lld --version-script %terr1.script -shared %t.o -o %t.so 2>&1 | \ -// RUN: FileCheck -check-prefix=ERR1 %s -// ERR1: {{.*}}:1: unclosed quote -// ERR1-NEXT: {{.*}}: unexpected EOF +// RUN: FileCheck -strict-whitespace -check-prefix=ERR1 %s +// ERR1: {{.*}}:1: unclosed quote +// ERR1-NEXT: {{.*}}:1: " +// ERR1-NEXT: {{.*}}:1: ^