Index: ELF/CMakeLists.txt =================================================================== --- ELF/CMakeLists.txt +++ ELF/CMakeLists.txt @@ -13,6 +13,7 @@ LinkerScript.cpp MarkLive.cpp OutputSections.cpp + ScriptParser.cpp SymbolTable.cpp Symbols.cpp Target.cpp Index: ELF/LinkerScript.cpp =================================================================== --- ELF/LinkerScript.cpp +++ ELF/LinkerScript.cpp @@ -17,6 +17,7 @@ #include "Config.h" #include "Driver.h" #include "InputSection.h" +#include "ScriptParser.h" #include "SymbolTable.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" @@ -101,25 +102,16 @@ return matchStr(SectionPattern, S->getSectionName()); } -class elf::ScriptParser { +class elf::ScriptParser final : public elf::ScriptParserBase { typedef void (ScriptParser::*Handler)(); public: ScriptParser(BumpPtrAllocator *A, StringRef S, bool B) - : Saver(*A), Input(S), Tokens(tokenize(S)), IsUnderSysroot(B) {} + : ScriptParserBase(S), Saver(*A), IsUnderSysroot(B) {} - void run(); + void run() final; private: - void setError(const Twine &Msg); - static std::vector tokenize(StringRef S); - static StringRef skipSpace(StringRef S); - bool atEOF(); - StringRef next(); - StringRef peek(); - bool skip(StringRef Tok); - void expect(StringRef Expect); - void addFile(StringRef Path); void readAsNeeded(); @@ -137,17 +129,9 @@ void readOutputSectionDescription(); void readSectionPatterns(StringRef OutSec, bool Keep); - size_t getPos(); - void printErrorPos(); - std::vector parseHex(StringRef S); - StringSaver Saver; - StringRef Input; - std::vector Tokens; const static StringMap Cmd; - size_t Pos = 0; bool IsUnderSysroot; - bool Error = false; }; const StringMap elf::ScriptParser::Cmd = { @@ -173,128 +157,6 @@ } } -// Returns the line that the character S[Pos] is in. -static StringRef getLine(StringRef S, size_t Pos) { - size_t Begin = S.rfind('\n', Pos); - size_t End = S.find('\n', Pos); - Begin = (Begin == StringRef::npos) ? 0 : Begin + 1; - if (End == StringRef::npos) - End = S.size(); - // rtrim for DOS-style newlines. - return S.substr(Begin, End - Begin).rtrim(); -} - -void ScriptParser::printErrorPos() { - StringRef Tok = Tokens[Pos == 0 ? 0 : Pos - 1]; - StringRef Line = getLine(Input, Tok.data() - Input.data()); - size_t Col = Tok.data() - Line.data(); - error(Line); - error(std::string(Col, ' ') + "^"); -} - -// We don't want to record cascading errors. Keep only the first one. -void ScriptParser::setError(const Twine &Msg) { - if (Error) - return; - error("line " + Twine(getPos()) + ": " + Msg); - printErrorPos(); - Error = true; -} - -// Split S into linker script tokens. -std::vector ScriptParser::tokenize(StringRef S) { - std::vector Ret; - for (;;) { - S = skipSpace(S); - if (S.empty()) - return Ret; - - // Quoted token - if (S.startswith("\"")) { - size_t E = S.find("\"", 1); - if (E == StringRef::npos) { - error("unclosed quote"); - return {}; - } - Ret.push_back(S.substr(1, E - 1)); - S = S.substr(E + 1); - continue; - } - - // Unquoted token - size_t Pos = S.find_first_not_of( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - "0123456789_.$/\\~=+[]*?-:"); - // A character that cannot start a word (which is usually a - // punctuation) forms a single character token. - if (Pos == 0) - Pos = 1; - Ret.push_back(S.substr(0, Pos)); - S = S.substr(Pos); - } -} - -// Skip leading whitespace characters or /**/-style comments. -StringRef ScriptParser::skipSpace(StringRef S) { - for (;;) { - if (S.startswith("/*")) { - size_t E = S.find("*/", 2); - if (E == StringRef::npos) { - error("unclosed comment in a linker script"); - return ""; - } - S = S.substr(E + 2); - continue; - } - size_t Size = S.size(); - S = S.ltrim(); - if (S.size() == Size) - return S; - } -} - -// An errneous token is handled as if it were the last token before EOF. -bool ScriptParser::atEOF() { return Error || Tokens.size() == Pos; } - -StringRef ScriptParser::next() { - if (Error) - return ""; - if (atEOF()) { - setError("unexpected EOF"); - return ""; - } - return Tokens[Pos++]; -} - -StringRef ScriptParser::peek() { - StringRef Tok = next(); - if (Error) - return ""; - --Pos; - return Tok; -} - -bool ScriptParser::skip(StringRef Tok) { - if (Error) - return false; - if (atEOF()) { - setError("unexpected EOF"); - return false; - } - if (Tok != Tokens[Pos]) - return false; - ++Pos; - return true; -} - -void ScriptParser::expect(StringRef Expect) { - if (Error) - return; - StringRef Tok = next(); - if (Tok != Expect) - setError(Expect + " expected, but got " + Tok); -} - void ScriptParser::addFile(StringRef S) { if (IsUnderSysroot && S.startswith("/")) { SmallString<128> Path; @@ -435,30 +297,6 @@ Script->Sections.emplace_back(OutSec, next(), Keep); } -// Returns the current line number. -size_t ScriptParser::getPos() { - if (Pos == 0) - return 1; - const char *Begin = Input.data(); - const char *Tok = Tokens[Pos - 1].data(); - return StringRef(Begin, Tok - Begin).count('\n') + 1; -} - -std::vector ScriptParser::parseHex(StringRef S) { - std::vector Hex; - while (!S.empty()) { - StringRef B = S.substr(0, 2); - S = S.substr(2); - uint8_t H; - if (B.getAsInteger(16, H)) { - setError("not a hexadecimal value: " + B); - return {}; - } - Hex.push_back(H); - } - return Hex; -} - void ScriptParser::readOutputSectionDescription() { StringRef OutSec = next(); Script->SectionOrder.push_back(OutSec); Index: ELF/ScriptParser.h =================================================================== --- /dev/null +++ ELF/ScriptParser.h @@ -0,0 +1,50 @@ +//===- ScriptParser.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_ELF_SCRIPT_PARSER_H +#define LLD_ELF_SCRIPT_PARSER_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" + +namespace lld { +namespace elf { + +class ScriptParserBase { +public: + ScriptParserBase(StringRef S) : Input(S), Tokens(tokenize(S)) {} + virtual ~ScriptParserBase() = default; + + virtual void run() = 0; + +protected: + void setError(const Twine &Msg); + static std::vector tokenize(StringRef S); + static StringRef skipSpace(StringRef S); + bool atEOF(); + StringRef next(); + StringRef peek(); + bool skip(StringRef Tok); + void expect(StringRef Expect); + + size_t getPos(); + void printErrorPos(); + + std::vector parseHex(StringRef S); + + StringRef Input; + std::vector Tokens; + size_t Pos = 0; + bool Error = false; +}; + +} // namespace elf +} // namespace lld + +#endif Index: ELF/ScriptParser.cpp =================================================================== --- /dev/null +++ ELF/ScriptParser.cpp @@ -0,0 +1,166 @@ +//===- ScriptParser.cpp ---------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the base parser class for linker script and dynamic +// list. +// +//===----------------------------------------------------------------------===// + +#include "ScriptParser.h" +#include "Error.h" + +using namespace llvm; +using namespace lld; +using namespace lld::elf; + +// Returns the line that the character S[Pos] is in. +static StringRef getLine(StringRef S, size_t Pos) { + size_t Begin = S.rfind('\n', Pos); + size_t End = S.find('\n', Pos); + Begin = (Begin == StringRef::npos) ? 0 : Begin + 1; + if (End == StringRef::npos) + End = S.size(); + // rtrim for DOS-style newlines. + return S.substr(Begin, End - Begin).rtrim(); +} + +void ScriptParserBase::printErrorPos() { + StringRef Tok = Tokens[Pos == 0 ? 0 : Pos - 1]; + StringRef Line = getLine(Input, Tok.data() - Input.data()); + size_t Col = Tok.data() - Line.data(); + error(Line); + error(std::string(Col, ' ') + "^"); +} + +// We don't want to record cascading errors. Keep only the first one. +void ScriptParserBase::setError(const Twine &Msg) { + if (Error) + return; + error("line " + Twine(getPos()) + ": " + Msg); + printErrorPos(); + Error = true; +} + +// Split S into linker script tokens. +std::vector ScriptParserBase::tokenize(StringRef S) { + std::vector Ret; + for (;;) { + S = skipSpace(S); + if (S.empty()) + return Ret; + + // Quoted token + if (S.startswith("\"")) { + size_t E = S.find("\"", 1); + if (E == StringRef::npos) { + error("unclosed quote"); + return {}; + } + Ret.push_back(S.substr(1, E - 1)); + S = S.substr(E + 1); + continue; + } + + // Unquoted token + size_t Pos = S.find_first_not_of( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789_.$/\\~=+[]*?-:"); + // A character that cannot start a word (which is usually a + // punctuation) forms a single character token. + if (Pos == 0) + Pos = 1; + Ret.push_back(S.substr(0, Pos)); + S = S.substr(Pos); + } +} + +// Skip leading whitespace characters or /**/-style comments. +StringRef ScriptParserBase::skipSpace(StringRef S) { + for (;;) { + if (S.startswith("/*")) { + size_t E = S.find("*/", 2); + if (E == StringRef::npos) { + error("unclosed comment in a linker script"); + return ""; + } + S = S.substr(E + 2); + continue; + } + size_t Size = S.size(); + S = S.ltrim(); + if (S.size() == Size) + return S; + } +} + +// An erroneous token is handled as if it were the last token before EOF. +bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; } + +StringRef ScriptParserBase::next() { + if (Error) + return ""; + if (atEOF()) { + setError("unexpected EOF"); + return ""; + } + return Tokens[Pos++]; +} + +StringRef ScriptParserBase::peek() { + StringRef Tok = next(); + if (Error) + return ""; + --Pos; + return Tok; +} + +bool ScriptParserBase::skip(StringRef Tok) { + if (Error) + return false; + if (atEOF()) { + setError("unexpected EOF"); + return false; + } + if (Tokens[Pos] != Tok) + return false; + ++Pos; + return true; +} + +void ScriptParserBase::expect(StringRef Expect) { + if (Error) + return; + StringRef Tok = next(); + if (Tok != Expect) + setError(Expect + " expected, but got " + Tok); +} + +// Returns the current line number. +size_t ScriptParserBase::getPos() { + if (Pos == 0) + return 1; + const char *Begin = Input.data(); + const char *Tok = Tokens[Pos - 1].data(); + return StringRef(Begin, Tok - Begin).count('\n') + 1; +} + +std::vector ScriptParserBase::parseHex(StringRef S) { + std::vector Hex; + while (!S.empty()) { + StringRef B = S.substr(0, 2); + S = S.substr(2); + uint8_t H; + if (B.getAsInteger(16, H)) { + setError("not a hexadecimal value: " + B); + return {}; + } + Hex.push_back(H); + } + return Hex; +}