Index: lld/trunk/ELF/LinkerScript.h =================================================================== --- lld/trunk/ELF/LinkerScript.h +++ lld/trunk/ELF/LinkerScript.h @@ -114,28 +114,20 @@ // It can optionally have negative match pattern for EXCLUDED_FILE command. // Also it may be surrounded with SORT() command, so contains sorting rules. struct SectionPattern { - SectionPattern(StringMatcher &&Re1, StringMatcher &&Re2) - : ExcludedFileRe(std::forward(Re1)), - SectionRe(std::forward(Re2)) {} - - SectionPattern(SectionPattern &&Other) { - std::swap(ExcludedFileRe, Other.ExcludedFileRe); - std::swap(SectionRe, Other.SectionRe); - std::swap(SortOuter, Other.SortOuter); - std::swap(SortInner, Other.SortInner); - } + SectionPattern(StringMatcher &&Pat1, StringMatcher &&Pat2) + : ExcludedFilePat(Pat1), SectionPat(Pat2) {} - StringMatcher ExcludedFileRe; - StringMatcher SectionRe; + StringMatcher ExcludedFilePat; + StringMatcher SectionPat; SortSectionPolicy SortOuter; SortSectionPolicy SortInner; }; struct InputSectionDescription : BaseCommand { InputSectionDescription(StringRef FilePattern) - : BaseCommand(InputSectionKind), FileRe(FilePattern) {} + : BaseCommand(InputSectionKind), FilePat({FilePattern}) {} static bool classof(const BaseCommand *C); - StringMatcher FileRe; + StringMatcher FilePat; // Input sections that matches at least one of SectionPatterns // will be associated with this InputSectionDescription. Index: lld/trunk/ELF/LinkerScript.cpp =================================================================== --- lld/trunk/ELF/LinkerScript.cpp +++ lld/trunk/ELF/LinkerScript.cpp @@ -111,11 +111,11 @@ bool LinkerScript::shouldKeep(InputSectionBase *S) { for (InputSectionDescription *ID : Opt.KeptSections) { StringRef Filename = S->getFile()->getName(); - if (!ID->FileRe.match(sys::path::filename(Filename))) + if (!ID->FilePat.match(sys::path::filename(Filename))) continue; for (SectionPattern &P : ID->SectionPatterns) - if (P.SectionRe.match(S->Name)) + if (P.SectionPat.match(S->Name)) return true; } return false; @@ -178,13 +178,13 @@ size_t SizeBefore = I->Sections.size(); for (ObjectFile *F : Symtab::X->getObjectFiles()) { StringRef Filename = sys::path::filename(F->getName()); - if (!I->FileRe.match(Filename) || Pat.ExcludedFileRe.match(Filename)) + if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) continue; for (InputSectionBase *S : F->getSections()) - if (!isDiscarded(S) && !S->OutSec && Pat.SectionRe.match(S->Name)) + if (!isDiscarded(S) && !S->OutSec && Pat.SectionPat.match(S->Name)) I->Sections.push_back(S); - if (Pat.SectionRe.match("COMMON")) + if (Pat.SectionPat.match("COMMON")) I->Sections.push_back(InputSection::CommonInputSection); } @@ -1211,7 +1211,7 @@ std::vector V; while (!Error && !consume(")")) V.push_back(next()); - return StringMatcher(std::move(V)); + return StringMatcher(V); } SortSectionPolicy ScriptParser::readSortKind() { @@ -1236,10 +1236,10 @@ std::vector ScriptParser::readInputSectionsList() { std::vector Ret; while (!Error && peek() != ")") { - StringMatcher ExcludeFileRe; + StringMatcher ExcludeFilePat; if (consume("EXCLUDE_FILE")) { expect("("); - ExcludeFileRe = readFilePatterns(); + ExcludeFilePat = readFilePatterns(); } std::vector V; @@ -1247,7 +1247,7 @@ V.push_back(next()); if (!V.empty()) - Ret.push_back({std::move(ExcludeFileRe), StringMatcher(std::move(V))}); + Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); else setError("section pattern is expected"); } Index: lld/trunk/ELF/Strings.h =================================================================== --- lld/trunk/ELF/Strings.h +++ lld/trunk/ELF/Strings.h @@ -12,29 +12,54 @@ #include "lld/Core/LLVM.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Regex.h" #include namespace lld { namespace elf { -llvm::Regex compileGlobPatterns(ArrayRef V); + int getPriority(StringRef S); bool hasWildcard(StringRef S); std::vector parseHex(StringRef S); bool isValidCIdentifier(StringRef S); StringRef unquote(StringRef S); +// This class represents a glob pattern. Supported metacharacters +// are "*", "?", "[]" and "[^]". +class GlobPattern { +public: + explicit GlobPattern(StringRef Pat); + bool match(StringRef S) const; + +private: + bool matchOne(ArrayRef Pat, StringRef S) const; + llvm::BitVector scan(StringRef &S); + llvm::BitVector expand(StringRef S); + + // Parsed glob pattern. + std::vector Tokens; + + // A glob pattern given to this class. This is for error reporting. + StringRef Original; + + // The following members are for optimization. + llvm::Optional Exact; + llvm::Optional Prefix; + llvm::Optional Suffix; +}; + +// This class represents multiple glob patterns. class StringMatcher { public: StringMatcher() = default; - explicit StringMatcher(StringRef P) : Patterns({P}) {} - explicit StringMatcher(std::vector &&Pat) - : Patterns(std::move(Pat)) {} + explicit StringMatcher(const std::vector &Pat); + + bool match(StringRef S) const; - bool match(StringRef S); private: - std::vector Patterns; + std::vector Patterns; }; // Returns a demangled C++ symbol name. If Name is not a mangled Index: lld/trunk/ELF/Strings.cpp =================================================================== --- lld/trunk/ELF/Strings.cpp +++ lld/trunk/ELF/Strings.cpp @@ -21,36 +21,142 @@ using namespace lld; using namespace lld::elf; -// Returns true if S matches T. S can contain glob meta-characters. -// The asterisk ('*') matches zero or more characters, and the question -// mark ('?') matches one character. -static bool globMatch(StringRef S, StringRef T) { +// This is a scanner for the glob pattern. +// A glob pattern token is one of "*", "?", "[]", "[^]" +// (which is a negative form of "[]"), or a non-meta character. +// This function returns the first token in S. +BitVector GlobPattern::scan(StringRef &S) { + switch (S[0]) { + case '*': + S = S.substr(1); + // '*' is represented by an empty bitvector. + // All other bitvectors are 256-bit long. + return BitVector(); + case '?': + S = S.substr(1); + return BitVector(256, true); + case '[': { + size_t End = S.find(']', 1); + if (End == StringRef::npos) { + error("invalid glob pattern: " + Original); + return BitVector(256, false); + } + StringRef Chars = S.substr(1, End - 1); + S = S.substr(End + 1); + if (Chars.startswith("^")) + return expand(Chars.substr(1)).flip(); + return expand(Chars); + } + default: + BitVector BV(256, false); + BV[S[0]] = true; + S = S.substr(1); + return BV; + } +} + +// Expands character ranges and returns a bitmap. +// For example, "a-cf-hz" is expanded to "abcfghz". +BitVector GlobPattern::expand(StringRef S) { + BitVector BV(256, false); + + // Expand "x-y". for (;;) { - if (S.empty()) - return T.empty(); - if (S[0] == '*') { + if (S.size() < 3) + break; + + // If it doesn't start with something like "x-y", + // consume the first character and proceed. + if (S[1] != '-') { + BV[S[0]] = true; S = S.substr(1); - if (S.empty()) + continue; + } + + // It must be in the form of "x-y". + // Validate it and then interpret the range. + if (S[0] > S[2]) { + error("invalid glob pattern: " + Original); + return BV; + } + for (int C = S[0]; C <= S[2]; ++C) + BV[C] = true; + S = S.substr(3); + } + + for (char C : S) + BV[C] = true; + return BV; +} + +GlobPattern::GlobPattern(StringRef S) : Original(S) { + if (!hasWildcard(S)) { + // S doesn't contain any metacharacter, + // so the regular string comparison should work. + Exact = S; + } else if (S.endswith("*") && !hasWildcard(S.drop_back())) { + // S is something like "foo*". We can use startswith(). + Prefix = S.drop_back(); + } else if (S.startswith("*") && !hasWildcard(S.drop_front())) { + // S is something like "*foo". We can use endswith(). + Suffix = S.drop_front(); + } else { + // Otherwise, we need to do real glob pattern matching. + // Parse the pattern now. + while (!S.empty()) + Tokens.push_back(scan(S)); + } +} + +bool GlobPattern::match(StringRef S) const { + if (Exact) + return S == *Exact; + if (Prefix) + return S.startswith(*Prefix); + if (Suffix) + return S.endswith(*Suffix); + return matchOne(Tokens, S); +} + +// Runs glob pattern Pats against string S. +bool GlobPattern::matchOne(ArrayRef Pats, StringRef S) const { + for (;;) { + if (Pats.empty()) + return S.empty(); + + // If Pats[0] is '*', try to match Pats[1..] against all possible + // substrings of S to see at least one pattern succeeds. + if (Pats[0].size() == 0) { + Pats = Pats.slice(1); + if (Pats.empty()) // Fast path. If a pattern is '*', it matches anything. return true; - for (size_t I = 0, E = T.size(); I < E; ++I) - if (globMatch(S, T.substr(I))) + for (size_t I = 0, E = S.size(); I < E; ++I) + if (matchOne(Pats, S.substr(I))) return true; return false; } - if (T.empty() || (S[0] != T[0] && S[0] != '?')) + + // If Pats[0] is not '*', it must consume one character. + if (S.empty() || !Pats[0][S[0]]) return false; + Pats = Pats.slice(1); S = S.substr(1); - T = T.substr(1); } } -bool StringMatcher::match(StringRef S) { - for (StringRef P : Patterns) - if (globMatch(P, S)) +StringMatcher::StringMatcher(const std::vector &Pat) { + for (StringRef S : Pat) + Patterns.push_back(GlobPattern(S)); +} + +bool StringMatcher::match(StringRef S) const { + for (const GlobPattern &Pat : Patterns) + if (Pat.match(S)) return true; return false; } + // If an input string is in the form of "foo.N" where N is a number, // return N. Otherwise, returns 65536, which is one greater than the // lowest priority. @@ -74,42 +180,6 @@ return S.substr(1, S.size() - 2); } -// Converts a glob pattern to a regular expression. -static std::string toRegex(StringRef S) { - std::string T; - bool InBracket = false; - while (!S.empty()) { - char C = S.front(); - if (InBracket) { - InBracket = C != ']'; - T += C; - S = S.drop_front(); - continue; - } - - if (C == '*') - T += ".*"; - else if (C == '?') - T += '.'; - else if (StringRef(".+^${}()|/\\").find_first_of(C) != StringRef::npos) - T += std::string("\\") + C; - else - T += C; - - InBracket = C == '['; - S = S.substr(1); - } - return T; -} - -// Converts multiple glob patterns to a regular expression. -Regex elf::compileGlobPatterns(ArrayRef V) { - std::string T = "^(" + toRegex(V[0]); - for (StringRef S : V.slice(1)) - T += "|" + toRegex(S); - return Regex(T + ")$"); -} - // Converts a hex string (e.g. "deadbeef") to a vector. std::vector elf::parseHex(StringRef S) { std::vector Hex; Index: lld/trunk/ELF/SymbolTable.h =================================================================== --- lld/trunk/ELF/SymbolTable.h +++ lld/trunk/ELF/SymbolTable.h @@ -12,9 +12,9 @@ #include "InputFiles.h" #include "LTO.h" +#include "Strings.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/Support/Regex.h" namespace lld { namespace elf { @@ -92,7 +92,7 @@ void wrap(StringRef Name); private: - std::vector findAll(const llvm::Regex &Re); + std::vector findAll(const StringMatcher &M); std::pair insert(StringRef &Name); std::pair insert(StringRef &Name, uint8_t Type, uint8_t Visibility, bool CanOmitFromDynSym, Index: lld/trunk/ELF/SymbolTable.cpp =================================================================== --- lld/trunk/ELF/SymbolTable.cpp +++ lld/trunk/ELF/SymbolTable.cpp @@ -470,12 +470,12 @@ // Returns a list of defined symbols that match with a given regex. template -std::vector SymbolTable::findAll(const Regex &Re) { +std::vector SymbolTable::findAll(const StringMatcher &M) { std::vector Res; for (Symbol *Sym : SymVector) { SymbolBody *B = Sym->body(); StringRef Name = B->getName(); - if (!B->isUndefined() && const_cast(Re).match(Name)) + if (!B->isUndefined() && M.match(Name)) Res.push_back(B); } return Res; @@ -611,10 +611,10 @@ static std::vector findAllDemangled(const std::map> &D, - const Regex &Re) { + StringMatcher &M) { std::vector Res; for (auto &P : D) { - if (const_cast(Re).match(P.first)) + if (M.match(P.first)) for (SymbolBody *Body : P.second) if (!Body->isUndefined()) Res.push_back(Body); @@ -639,8 +639,8 @@ } if (Patterns.empty()) return; - Regex Re = compileGlobPatterns(Patterns); - std::vector Syms = findAll(Re); + StringMatcher M(Patterns); + std::vector Syms = findAll(M); for (SymbolBody *B : Syms) B->symbol()->VersionId = VER_NDX_GLOBAL; } @@ -696,9 +696,9 @@ for (SymbolVersion &Sym : V.Globals) { if (!Sym.HasWildcards) continue; - Regex Re = compileGlobPatterns({Sym.Name}); + StringMatcher M({Sym.Name}); std::vector Syms = - Sym.IsExternCpp ? findAllDemangled(Demangled, Re) : findAll(Re); + Sym.IsExternCpp ? findAllDemangled(Demangled, M) : findAll(M); // Exact matching takes precendence over fuzzy matching, // so we set a version to a symbol only if no version has been assigned