Index: lld/ELF/LinkerScript.cpp =================================================================== --- lld/ELF/LinkerScript.cpp +++ lld/ELF/LinkerScript.cpp @@ -1610,9 +1610,7 @@ Expr E; assert(Op == "=" || Op == "+="); if (consume("ABSOLUTE")) { - // The RHS may be something like "ABSOLUTE(.) & 0xff". - // Call readExpr1 to read the whole expression. - E = readExpr1(readParenExpr(), 0); + E = readExpr(); E.IsAbsolute = [] { return true; }; } else { E = readExpr(); @@ -1628,7 +1626,15 @@ // This is an operator-precedence parser to parse a linker // script expression. -Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } +Expr ScriptParser::readExpr() { + // Our lexer is context-aware. Set the in-expression bit so that + // they apply different tokenization rules. + bool Orig = InExpr; + InExpr = true; + Expr E = readExpr1(readPrimary(), 0); + InExpr = Orig; + return E; +} static Expr combine(StringRef Op, Expr L, Expr R) { auto IsAbs = [=] { return L.IsAbsolute() && R.IsAbsolute(); }; Index: lld/ELF/ScriptLexer.h =================================================================== --- lld/ELF/ScriptLexer.h +++ lld/ELF/ScriptLexer.h @@ -36,10 +36,12 @@ std::vector MBs; std::vector Tokens; + bool InExpr = false; size_t Pos = 0; bool Error = false; private: + void maybeSplitExpr(); StringRef getLine(); size_t getLineNumber(); size_t getColumnNumber(); Index: lld/ELF/ScriptLexer.cpp =================================================================== --- lld/ELF/ScriptLexer.cpp +++ lld/ELF/ScriptLexer.cpp @@ -26,18 +26,9 @@ // lookahead is labels in version scripts, where we need to parse "local :" // as if "local:". // -// Overall, this lexer works fine for most linker scripts. There's room -// for improving compatibility, but that's probably not at the top of our -// todo list. -// -// A caveat: This lexer splits an input string into tokens ahead of time, -// so the lexer is not context aware. There's one known corner case. Let's -// say the next string is "val*3" (without quotes). In the context where -// the parser is expecting an expression, that should be tokenizes to -// "val", "*" and "3". In other context, it should be just a single -// token. (If it is in a filename context, it'll be interpeted as a glob -// pattern, for example.) We want to fix this, but it probably needs a -// redesign of this lexer. +// Overall, this lexer works fine for most linker scripts. There might +// be room for improving compatibility, but that's probably not at the +// top of our todo list. // //===----------------------------------------------------------------------===// @@ -175,7 +166,59 @@ // An erroneous token is handled as if it were the last token before EOF. bool ScriptLexer::atEOF() { return Error || Tokens.size() == Pos; } +// Split a given string as an expression. +// This function returns "3", "*" and "5" for "3*5" for example. +static std::vector tokenizeExpr(StringRef S) { + StringRef Ops = "+-*/"; // List of operators + + // Quoted strings are literal strings, so we don't want to split it. + if (S.startswith("\"")) + return {S}; + + // Split S with +-*/ as separators. + std::vector Ret; + while (!S.empty()) { + size_t E = S.find_first_of(Ops); + if (E == StringRef::npos) { + Ret.push_back(S); + break; + } + + if (E != 0) { + Ret.push_back(S.substr(0, E)); + S = S.substr(E); + continue; + } + + Ret.push_back(S.substr(0, 1)); + S = S.substr(1); + } + return Ret; +} + +// In contexts where expressions are expected, the lexer should apply +// different tokenization rules than the default one. By default, +// arithmetic operator characters are regular characters, but in the +// expression context, they should be independent tokens. +// +// For example, "foo*3" should be tokenized to "foo", "*" and "3" only +// in the expression context. +// +// This function may split the current token into multiple tokens. +void ScriptLexer::maybeSplitExpr() { + if (!InExpr || Error || atEOF()) + return; + + std::vector V = tokenizeExpr(Tokens[Pos]); + if (V.size() == 1) + return; + Tokens.erase(Tokens.begin() + Pos); + Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end()); +} + StringRef ScriptLexer::next() { + maybeSplitExpr(); + if (Error) return ""; if (atEOF()) { @@ -186,6 +229,8 @@ } StringRef ScriptLexer::peek(unsigned N) { + maybeSplitExpr(); + StringRef Tok; for (unsigned I = 0; I <= N; ++I) { Tok = next(); Index: lld/test/ELF/linkerscript/operators.s =================================================================== --- lld/test/ELF/linkerscript/operators.s +++ lld/test/ELF/linkerscript/operators.s @@ -10,7 +10,7 @@ # RUN: .div : { *(.div) } \ # RUN: . = 0x11000 + 0x1000 * 0x2; \ # RUN: .mul : { *(.mul) } \ -# RUN: . = 0x10000 + (0x1000 + 0x1000) * 0x2; \ +# RUN: . = 0x10000+(0x1000+0x1000)*0x2; \ # RUN: .bracket : { *(.bracket) } \ # RUN: . = 0x17000 & 0x15000; \ # RUN: .and : { *(.and) } \