diff --git a/llvm/docs/CommandGuide/llvm-objcopy.rst b/llvm/docs/CommandGuide/llvm-objcopy.rst --- a/llvm/docs/CommandGuide/llvm-objcopy.rst +++ b/llvm/docs/CommandGuide/llvm-objcopy.rst @@ -129,6 +129,27 @@ Display the version of this program. +.. option:: --wildcard, -w + + Allow wildcard syntax for symbol-related flags. On by default for + section-related flags. Incompatible with --regex. + + Wildcard syntax allows the following special symbols: + + ====================== ========================= ================== + Character Meaning Equivalent + ====================== ========================= ================== + ``*`` Any number of characters ``.*`` + ``?`` Any single character ``.`` + ``\`` Escape the next character ``\`` + ``[a-z]`` Character class ``[a-z]`` + ``[!a-z]``, ``[^a-z]`` Negated character class ``[^a-z]`` + ====================== ========================= ================== + + Additionally, starting a wildcard with '!' will prevent a match, even if + another flag matches. For example ``-w -N '*' -N '!x'`` will strip all symbols + except for ``x``. + COFF-SPECIFIC OPTIONS --------------------- diff --git a/llvm/docs/CommandGuide/llvm-strip.rst b/llvm/docs/CommandGuide/llvm-strip.rst --- a/llvm/docs/CommandGuide/llvm-strip.rst +++ b/llvm/docs/CommandGuide/llvm-strip.rst @@ -103,6 +103,27 @@ Display the version of this program. +.. option:: --wildcard, -w + + Allow wildcard syntax for symbol-related flags. On by default for + section-related flags. Incompatible with --regex. + + Wildcard syntax allows the following special symbols: + + ====================== ========================= ================== + Character Meaning Equivalent + ====================== ========================= ================== + ``*`` Any number of characters ``.*`` + ``?`` Any single character ``.`` + ``\`` Escape the next character ``\`` + ``[a-z]`` Character class ``[a-z]`` + ``[!a-z]``, ``[^a-z]`` Negated character class ``[^a-z]`` + ====================== ========================= ================== + + Additionally, starting a wildcard with '!' will prevent a match, even if + another flag matches. For example ``-w -N '*' -N '!x'`` will strip all symbols + except for ``x``. + COFF-SPECIFIC OPTIONS --------------------- diff --git a/llvm/include/llvm/Support/Regex.h b/llvm/include/llvm/Support/Regex.h --- a/llvm/include/llvm/Support/Regex.h +++ b/llvm/include/llvm/Support/Regex.h @@ -96,6 +96,11 @@ /// Turn String into a regex by escaping its special characters. static std::string escape(StringRef String); + /// Turn a shell wildcard string into a regular expression by escaping + /// non-wildcard special characters and converting wildcard notation to its + /// equivalent regular expression notation. + static std::string fromShellWildcard(StringRef Wildcard); + private: struct llvm_regex *preg; int error; diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp --- a/llvm/lib/Support/Regex.cpp +++ b/llvm/lib/Support/Regex.cpp @@ -207,3 +207,110 @@ return RegexStr; } + +// These are the special characters matched in functions like "p_ere_exp". +static const char CollatingMetachars[] = ":.="; + +static StringRef consumeCollatingClass(StringRef Wildcard, + std::string *RegexStr) { + assert(Wildcard.size() >= 2 && "Wildcard too short for collating class"); + assert(Wildcard.front() == '[' && "Collating class doesn't start with ["); + assert(strchr(CollatingMetachars, Wildcard[1]) && + "Collating class doesn't use any of :.="); + + char CollatingEnd[] = {Wildcard[1], ']', 0}; + + size_t pos = Wildcard.find(CollatingEnd); + if (pos == StringRef::npos) { + *RegexStr += Wildcard; + return StringRef(); + } + + *RegexStr += Wildcard.take_front(pos); + Wildcard = Wildcard.drop_front(pos); + + if (Wildcard.consume_front(CollatingEnd)) + *RegexStr += CollatingEnd; + return Wildcard; +} + +static StringRef consumeBracket(StringRef Wildcard, std::string *RegexStr) { + assert(Wildcard.front() == '[' && "Wildcard bracket doesn't start with ["); + + *RegexStr += "["; + Wildcard = Wildcard.drop_front(); + + // The first character can be ! instead of ^ to negate a character class. + if (!Wildcard.empty() && + (Wildcard.front() == '!' || Wildcard.front() == '^')) { + Wildcard = Wildcard.drop_front(); + *RegexStr += "^"; + } + + // Allow ']' to be part of a character class if it's the first item. + if (Wildcard.consume_front("]")) + *RegexStr += "]"; + + // Consume characters until we see a collating class or a ']'. + while (!Wildcard.empty() && Wildcard.front() != ']') { + // If the character was '[', check if it's a collating symbol + // collection (e.g. "[:alpha:]"):. If so, consume until the end of it. + if (Wildcard.front() == '[' && Wildcard.size() >= 2 && + strchr(CollatingMetachars, Wildcard[1])) { + Wildcard = consumeCollatingClass(Wildcard, RegexStr); + } else { + *RegexStr += Wildcard.front(); + Wildcard = Wildcard.drop_front(); + } + } + + if (Wildcard.consume_front("]")) + *RegexStr += "]"; + + return Wildcard; +} + +std::string Regex::fromShellWildcard(StringRef Wildcard) { + std::string RegexStr; + + while (!Wildcard.empty()) { + switch (Wildcard.front()) { + case '*': + // * matches any number of characters. + Wildcard = Wildcard.drop_front(); + RegexStr += ".*"; + break; + + case '?': + // ? matches any single character. + Wildcard = Wildcard.drop_front(); + RegexStr += "."; + break; + + case '[': + // [] are treated normally, but the first character can be ! instead of ^ + // to negate a character class. + Wildcard = consumeBracket(Wildcard, &RegexStr); + break; + case ']': + Wildcard = Wildcard.drop_front(); + RegexStr += "]"; + break; + case '\\': + RegexStr += "\\"; + Wildcard = Wildcard.drop_front(); + if (!Wildcard.empty()) { + RegexStr += Wildcard.front(); + Wildcard = Wildcard.drop_front(); + } + break; + default: + if (strchr(RegexMetachars, Wildcard.front())) + RegexStr += "\\"; + RegexStr += Wildcard.front(); + Wildcard = Wildcard.drop_front(); + } + } + + return RegexStr; +} diff --git a/llvm/test/tools/llvm-objcopy/ELF/wildcard-flags.test b/llvm/test/tools/llvm-objcopy/ELF/wildcard-flags.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/ELF/wildcard-flags.test @@ -0,0 +1,162 @@ +# RUN: yaml2obj %s > %t.o + +## This test checks basic functionality of shell wildcard matching, as well as +## verifying all the relevant flags in llvm-objcopy and llvm-strip are +## configured correctly. +## For more detailed syntax tests, see wildcard-syntax.test. + +## Check that --regex and --wildcard cannot be used together. +# RUN: not llvm-objcopy --regex --wildcard %t.o %t.err.o 2>&1 \ +# RUN: | FileCheck %s --check-prefix=ERR +# RUN: not llvm-strip --regex --wildcard %t.o -o %t.err.o 2>&1 \ +# RUN: | FileCheck %s --check-prefix=ERR + +# ERR: error: --regex and --wildcard are incompatible + +## Check that section removal flags default to wildcard matches. + +## --keep-section: +# RUN: llvm-objcopy --strip-all --keep-section='.f*' %t.o %t.ksec.1.o +# RUN: llvm-readobj --sections %t.ksec.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,SEC,FOO-SEC +# RUN: llvm-strip --strip-all --keep-section='.f*' %t.o -o %t.ksec.2.o +# RUN: cmp %t.ksec.1.o %t.ksec.2.o + +## --only-section: +# RUN: llvm-objcopy --strip-all --only-section='.f*' %t.o %t.osec.1.o +# RUN: llvm-readobj --sections %t.osec.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,SEC,FOO-SEC + +## --remove-section +# RUN: llvm-objcopy --strip-debug --remove-section='.s??tab' %t.o %t.rsec.1.o +# RUN: llvm-readobj --sections %t.rsec.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,SEC,FOO-SEC,BAR-SEC +# RUN: llvm-strip --strip-debug --remove-section='.s??tab' %t.o -o %t.rsec.2.o +# RUN: cmp %t.rsec.1.o %t.rsec.2.o + +## Check that symbol removal options default to literal matches. Adding -w +## enables wildcard support for these options. + +## --globalize-symbol: +# RUN: llvm-objcopy --globalize-symbol='*' %t.o %t.globsym.1.o +# RUN: llvm-readobj --symbols %t.globsym.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,LOCAL,FOO-SYM,BAR-SYM + +# RUN: llvm-objcopy -w --globalize-symbol='*' %t.o %t.globsym.2.o +# RUN: llvm-readobj --symbols %t.globsym.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,GLOBAL,FOO-SYM,BAR-SYM + +## --keep-symbol: +# RUN: llvm-objcopy --discard-all --keep-symbol='f*' %t.o %t.ksym.1.o +# RUN: llvm-readobj --symbols %t.ksym.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK +# RUN: llvm-strip --discard-all --keep-symbol='f*' %t.o -o %t.ksym.2.o +# RUN: cmp %t.ksym.1.o %t.ksym.2.o + +# RUN: llvm-objcopy --discard-all -w --keep-symbol='f*' %t.o %t.ksym.3.o +# RUN: llvm-readobj --symbols %t.ksym.3.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO-SYM +# RUN: llvm-strip --discard-all -w --keep-symbol='f*' %t.o -o %t.ksym.4.o +# RUN: cmp %t.ksym.3.o %t.ksym.4.o + +## --localize-symbol: +## Note: Use %t.globsym.2.o instead of %t.o since those symbols are global. +# RUN: llvm-objcopy --localize-symbol='*' %t.globsym.2.o %t.localsym.1.o +# RUN: llvm-readobj --symbols %t.localsym.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,GLOBAL,FOO-SYM,BAR-SYM + +# RUN: llvm-objcopy -w --localize-symbol='*' %t.globsym.2.o %t.localsym.2.o +# RUN: llvm-readobj --symbols %t.localsym.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,LOCAL,FOO-SYM,BAR-SYM + +## --strip-symbol: +# RUN: llvm-objcopy --strip-symbol='f*' %t.o %t.stripsym.1.o +# RUN: llvm-readobj --symbols %t.stripsym.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO-SYM,BAR-SYM +# RUN: llvm-strip --strip-symbol='f*' %t.o -o %t.stripsym.2.o +# RUN: cmp %t.stripsym.1.o %t.stripsym.2.o + +# RUN: llvm-objcopy -w --strip-symbol='f*' %t.o %t.stripsym.3.o +# RUN: llvm-readobj --symbols %t.stripsym.3.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,BAR-SYM +# RUN: llvm-strip -w --strip-symbol='f*' %t.o -o %t.stripsym.4.o +# RUN: cmp %t.stripsym.3.o %t.stripsym.4.o + +## --strip-unneeded-symbol: +# RUN: llvm-objcopy --strip-unneeded-symbol='f*' %t.o %t.stripunsym.1.o +# RUN: llvm-readobj --symbols %t.stripunsym.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO-SYM,BAR-SYM + +# RUN: llvm-objcopy -w --strip-unneeded-symbol='f*' %t.o %t.stripunsym.2.o +# RUN: llvm-readobj --symbols %t.stripunsym.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,BAR-SYM + +## --weaken-symbol: +## Note: Use %t.globsym.2.o instead of %t.o since those symbols are global. +# RUN: llvm-objcopy --weaken-symbol='*' %t.globsym.2.o %t.weaksym.1.o +# RUN: llvm-readobj --symbols %t.weaksym.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,GLOBAL,FOO-SYM,BAR-SYM + +# RUN: llvm-objcopy -w --weaken-symbol='*' %t.globsym.2.o %t.weaksym.2.o +# RUN: llvm-readobj --symbols %t.weaksym.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,WEAK,FOO-SYM,BAR-SYM + +## --keep-global-symbol: +## Note: Use %t.globsym.2.o instead of %t.o since those symbols are global. +# RUN: llvm-objcopy --keep-global-symbol='*' %t.globsym.2.o %t.keepgsym.1.o +# RUN: llvm-readobj --symbols %t.keepgsym.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,LOCAL,FOO-SYM,BAR-SYM + +# RUN: llvm-objcopy -w --keep-global-symbol='*' %t.globsym.2.o %t.keepgsym.2.o +# RUN: llvm-readobj --symbols %t.keepgsym.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: \ +# RUN: --check-prefixes=CHECK,GLOBAL,FOO-SYM,BAR-SYM + +## Check that -w is accepted as an alias for --wildcard. +# RUN: llvm-objcopy --wildcard --keep-global-symbol='*' %t.globsym.2.o %t.keepgsym.3.o +# RUN: cmp %t.keepgsym.2.o %t.keepgsym.3.o + +# CHECK: LoadName: +# CHECK: Name: (0) + +# FOO-SEC: Name: .foo + +# FOO-SYM: Name: foo +# GLOBAL: Binding: Global +# WEAK: Binding: Weak +# LOCAL: Binding: Local + +# BAR-SEC: Name: .bar +# BAR-SYM: Name: bar +# GLOBAL: Binding: Global +# WEAK: Binding: Weak +# LOCAL: Binding: Local + +# SEC: Name: .shstrtab + +!ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .foo + Type: SHT_PROGBITS + - Name: .bar + Type: SHT_PROGBITS +Symbols: + - Name: foo + Type: STT_FUNC + Section: .foo + - Name: bar + Type: STT_FUNC + Section: .foo diff --git a/llvm/test/tools/llvm-objcopy/ELF/wildcard-syntax.test b/llvm/test/tools/llvm-objcopy/ELF/wildcard-syntax.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/ELF/wildcard-syntax.test @@ -0,0 +1,120 @@ +# RUN: yaml2obj --docnum=1 %s > %t.o + +## This test checks that llvm-objcopy accepts wildcard syntax correctly. + +## * matches all characters. +# RUN: llvm-objcopy --remove-section='.f*' %t.o %t.glob.o +# RUN: llvm-readobj --sections %t.glob.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,BAR + +## Wildcard matches are full matches. ("*a" does not match ".bar") +# RUN: llvm-objcopy --remove-section='*a' %t.o %t.full.o +# RUN: llvm-readobj --sections %t.full.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO,BAR + +## ? matches one character. +# RUN: llvm-objcopy --remove-section='.b?r' %t.o %t.question.o +# RUN: llvm-readobj --sections %t.question.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO + +## ! (as a leading character) prevents matches (not dependent on ordering). +# RUN: llvm-objcopy --remove-section='.???' --remove-section='!.f*' %t.o %t.negmatch.o +# RUN: llvm-readobj --sections %t.negmatch.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO + +## [a-z] matches a range of characters +# RUN: llvm-objcopy --remove-section='.[a-c][a-a][q-z]' %t.o %t.range.o +# RUN: llvm-readobj --sections %t.range.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,FOO + +## [^a-z] or [!a-z] match a negated range of characters. +# RUN: llvm-objcopy --remove-section='.[^x]oo' %t.o %t.negrange.1.o +# RUN: llvm-readobj --sections %t.negrange.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,BAR +# RUN: llvm-objcopy --remove-section='.[!x]oo' %t.o %t.negrange.2.o +# RUN: llvm-readobj --sections %t.negrange.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,BAR + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: .foo + Type: SHT_PROGBITS + - Name: .bar + Type: SHT_PROGBITS + +## Use a separate test file with special characters for the following tests. +# RUN: yaml2obj --docnum=2 %s > %t.special.o + +## \ escapes wildcard characters. +# RUN: llvm-objcopy --remove-section='\*' %t.special.o %t.escape.1.o +# RUN: llvm-readobj --sections %t.escape.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,DOT,QUESTION,LB,RB,FOO +# RUN: llvm-objcopy --remove-section='\?' %t.special.o %t.escape.2.o +# RUN: llvm-readobj --sections %t.escape.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,DOT,GLOB,LB,RB,FOO + +## Special characters are not treated like regular expression characters. +# RUN: llvm-objcopy --remove-section='.' %t.special.o %t.dot.o +# RUN: llvm-readobj --sections %t.dot.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,GLOB,QUESTION,LB,RB,FOO + +## Special characters in character classes are treated literally. +## [*] should not get expanded to [.*], which would match both '.' and '*' +# RUN: llvm-objcopy --remove-section='[*]' %t.special.o %t.class.1.o +# RUN: llvm-readobj --sections %t.class.1.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,DOT,QUESTION,LB,RB,FOO + +## ] doesn't close the character class as a first character. +# RUN: llvm-objcopy --remove-section='[]xyz]' %t.special.o %t.class.2.o +# RUN: llvm-readobj --sections %t.class.2.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,DOT,GLOB,QUESTION,LB,FOO + +## Named classes like [:lower:] work. +# RUN: llvm-objcopy --remove-section='.f[[:lower:]]o' %t.special.o %t.class.3.o +# RUN: llvm-readobj --sections %t.class.3.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,DOT,GLOB,QUESTION,LB,RB + +## Characters in brackets don't get escaped until the brackets are balanced. +## This matches either [:lower:] or a literal '*'. The '*' does not get expanded +## to '.*' which would also match '.'. +# RUN: llvm-objcopy --remove-section='[[:lower:]*]' %t.special.o %t.class.4.o +# RUN: llvm-readobj --sections %t.class.4.o \ +# RUN: | FileCheck %s --implicit-check-not=Name: --check-prefixes=CHECK,DOT,QUESTION,LB,RB,FOO + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Sections: + - Name: . + Type: SHT_PROGBITS + - Name: '*' + Type: SHT_PROGBITS + - Name: '?' + Type: SHT_PROGBITS + - Name: '[' + Type: SHT_PROGBITS + - Name: ']' + Type: SHT_PROGBITS + - Name: .foo + Type: SHT_PROGBITS + +# CHECK: LoadName: +# CHECK: Name: (0) +# DOT: Name: . +# GLOB: Name: * +# QUESTION: Name: ? +# LB: Name: [ +# RB: Name: ] +# FOO: Name: .foo +# BAR: Name: .bar +# CHECK: Name: .symtab +# CHECK: Name: .strtab +# CHECK: Name: .shstrtab diff --git a/llvm/tools/llvm-objcopy/CommonOpts.td b/llvm/tools/llvm-objcopy/CommonOpts.td --- a/llvm/tools/llvm-objcopy/CommonOpts.td +++ b/llvm/tools/llvm-objcopy/CommonOpts.td @@ -111,3 +111,13 @@ def V : Flag<["-"], "V">, Alias, HelpText<"Alias for --version">; + +def wildcard + : Flag<["--"], "wildcard">, + HelpText<"Allow wildcard syntax for symbol-related flags. Incompatible " + "with --regex. Allows using '*' to match any number of " + "characters, '?' to match any single character, '\' to escape " + "special characters, and '[]' to define character classes. " + "Wildcards beginning with '!' will prevent a match, for example " + "\"-N '*' -N '!x'\" will strip all symbols except for \"x\".">; +def w : Flag<["-"], "w">, Alias, HelpText<"Alias for --wildcard">; diff --git a/llvm/tools/llvm-objcopy/CopyConfig.h b/llvm/tools/llvm-objcopy/CopyConfig.h --- a/llvm/tools/llvm-objcopy/CopyConfig.h +++ b/llvm/tools/llvm-objcopy/CopyConfig.h @@ -87,13 +87,21 @@ Locals, // --discard-locals (-X) }; +enum class MatchStyle { + Literal, // Default for symbols. + Wildcard, // Default for sections, or enabled with --wildcard (-w). + Regex, // Enabled with --regex. +}; + class NameOrRegex { StringRef Name; // Regex is shared between multiple CopyConfig instances. std::shared_ptr R; + bool IsPositiveMatch = true; public: - NameOrRegex(StringRef Pattern, bool IsRegex); + NameOrRegex(StringRef Pattern, MatchStyle MS); + bool isPositiveMatch() const { return IsPositiveMatch; } bool operator==(StringRef S) const { return R ? R->match(S) : Name == S; } bool operator!=(StringRef S) const { return !operator==(S); } }; @@ -101,14 +109,20 @@ // Matcher that checks symbol or section names against the command line flags // provided for that option. class NameMatcher { - std::vector Matchers; + std::vector PosMatchers; + std::vector NegMatchers; public: void addMatcher(NameOrRegex Matcher) { - Matchers.push_back(std::move(Matcher)); + if (Matcher.isPositiveMatch()) + PosMatchers.push_back(std::move(Matcher)); + else + NegMatchers.push_back(std::move(Matcher)); + } + bool matches(StringRef S) const { + return is_contained(PosMatchers, S) && !is_contained(NegMatchers, S); } - bool matches(StringRef S) const { return is_contained(Matchers, S); } - bool empty() const { return Matchers.empty(); } + bool empty() const { return PosMatchers.empty() && NegMatchers.empty(); } }; struct NewSymbolInfo { diff --git a/llvm/tools/llvm-objcopy/CopyConfig.cpp b/llvm/tools/llvm-objcopy/CopyConfig.cpp --- a/llvm/tools/llvm-objcopy/CopyConfig.cpp +++ b/llvm/tools/llvm-objcopy/CopyConfig.cpp @@ -343,7 +343,7 @@ } static Error addSymbolsFromFile(NameMatcher &Symbols, BumpPtrAllocator &Alloc, - StringRef Filename, bool UseRegex) { + StringRef Filename, MatchStyle MS) { StringSaver Saver(Alloc); SmallVector Lines; auto BufOrErr = MemoryBuffer::getFile(Filename); @@ -356,21 +356,33 @@ // it's not empty. auto TrimmedLine = Line.split('#').first.trim(); if (!TrimmedLine.empty()) - Symbols.addMatcher({Saver.save(TrimmedLine), UseRegex}); + Symbols.addMatcher({Saver.save(TrimmedLine), MS}); } return Error::success(); } -NameOrRegex::NameOrRegex(StringRef Pattern, bool IsRegex) { - if (!IsRegex) { +NameOrRegex::NameOrRegex(StringRef Pattern, MatchStyle MS) { + switch (MS) { + case MatchStyle::Literal: Name = Pattern; - return; + break; + case MatchStyle::Wildcard: { + SmallVector Data; + if (Pattern[0] == '!') { + IsPositiveMatch = false; + Pattern = Pattern.drop_front(); + } + R = std::make_shared("^" + Regex::fromShellWildcard(Pattern) + "$"); + break; + } + case MatchStyle::Regex: { + SmallVector Data; + R = std::make_shared( + ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data)); + break; + } } - - SmallVector Data; - R = std::make_shared( - ("^" + Pattern.ltrim('^').rtrim('$') + "$").toStringRef(Data)); } static Error addSymbolsToRenameFromFile(StringMap &SymbolsToRename, @@ -459,7 +471,18 @@ errc::invalid_argument, "--target cannot be used with --input-target or --output-target"); - bool UseRegex = InputArgs.hasArg(OBJCOPY_regex); + if (InputArgs.hasArg(OBJCOPY_regex) && InputArgs.hasArg(OBJCOPY_wildcard)) + return createStringError(errc::invalid_argument, + "--regex and --wildcard are incompatible"); + + MatchStyle SectionMatchStyle = InputArgs.hasArg(OBJCOPY_regex) + ? MatchStyle::Regex + : MatchStyle::Wildcard; + MatchStyle SymbolMatchStyle = InputArgs.hasArg(OBJCOPY_regex) + ? MatchStyle::Regex + : InputArgs.hasArg(OBJCOPY_wildcard) + ? MatchStyle::Wildcard + : MatchStyle::Literal; StringRef InputFormat, OutputFormat; if (InputArgs.hasArg(OBJCOPY_target)) { InputFormat = InputArgs.getLastArgValue(OBJCOPY_target); @@ -612,11 +635,11 @@ } for (auto Arg : InputArgs.filtered(OBJCOPY_remove_section)) - Config.ToRemove.addMatcher({Arg->getValue(), UseRegex}); + Config.ToRemove.addMatcher({Arg->getValue(), SectionMatchStyle}); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_section)) - Config.KeepSection.addMatcher({Arg->getValue(), UseRegex}); + Config.KeepSection.addMatcher({Arg->getValue(), SectionMatchStyle}); for (auto Arg : InputArgs.filtered(OBJCOPY_only_section)) - Config.OnlySection.addMatcher({Arg->getValue(), UseRegex}); + Config.OnlySection.addMatcher({Arg->getValue(), SectionMatchStyle}); for (auto Arg : InputArgs.filtered(OBJCOPY_add_section)) { StringRef ArgValue(Arg->getValue()); if (!ArgValue.contains('=')) @@ -654,46 +677,47 @@ if (Config.DiscardMode == DiscardType::All) Config.StripDebug = true; for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol)) - Config.SymbolsToLocalize.addMatcher({Arg->getValue(), UseRegex}); + Config.SymbolsToLocalize.addMatcher({Arg->getValue(), SymbolMatchStyle}); for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToLocalize, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbol)) - Config.SymbolsToKeepGlobal.addMatcher({Arg->getValue(), UseRegex}); + Config.SymbolsToKeepGlobal.addMatcher({Arg->getValue(), SymbolMatchStyle}); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_global_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToKeepGlobal, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbol)) - Config.SymbolsToGlobalize.addMatcher({Arg->getValue(), UseRegex}); + Config.SymbolsToGlobalize.addMatcher({Arg->getValue(), SymbolMatchStyle}); for (auto Arg : InputArgs.filtered(OBJCOPY_globalize_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToGlobalize, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbol)) - Config.SymbolsToWeaken.addMatcher({Arg->getValue(), UseRegex}); + Config.SymbolsToWeaken.addMatcher({Arg->getValue(), SymbolMatchStyle}); for (auto Arg : InputArgs.filtered(OBJCOPY_weaken_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToWeaken, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbol)) - Config.SymbolsToRemove.addMatcher({Arg->getValue(), UseRegex}); + Config.SymbolsToRemove.addMatcher({Arg->getValue(), SymbolMatchStyle}); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToRemove, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbol)) - Config.UnneededSymbolsToRemove.addMatcher({Arg->getValue(), UseRegex}); + Config.UnneededSymbolsToRemove.addMatcher( + {Arg->getValue(), SymbolMatchStyle}); for (auto Arg : InputArgs.filtered(OBJCOPY_strip_unneeded_symbols)) if (Error E = addSymbolsFromFile(Config.UnneededSymbolsToRemove, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbol)) - Config.SymbolsToKeep.addMatcher({Arg->getValue(), UseRegex}); + Config.SymbolsToKeep.addMatcher({Arg->getValue(), SymbolMatchStyle}); for (auto Arg : InputArgs.filtered(OBJCOPY_keep_symbols)) if (Error E = addSymbolsFromFile(Config.SymbolsToKeep, DC.Alloc, - Arg->getValue(), UseRegex)) + Arg->getValue(), SymbolMatchStyle)) return std::move(E); for (auto Arg : InputArgs.filtered(OBJCOPY_add_symbol)) { Expected NSI = parseNewSymbolInfo(Arg->getValue()); @@ -801,7 +825,17 @@ "multiple input files cannot be used in combination with -o"); CopyConfig Config; - bool UseRegexp = InputArgs.hasArg(STRIP_regex); + + if (InputArgs.hasArg(STRIP_regex) && InputArgs.hasArg(STRIP_wildcard)) + return createStringError(errc::invalid_argument, + "--regex and --wildcard are incompatible"); + MatchStyle SectionMatchStyle = + InputArgs.hasArg(STRIP_regex) ? MatchStyle::Regex : MatchStyle::Wildcard; + MatchStyle SymbolMatchStyle = InputArgs.hasArg(STRIP_regex) + ? MatchStyle::Regex + : InputArgs.hasArg(STRIP_wildcard) + ? MatchStyle::Wildcard + : MatchStyle::Literal; Config.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links); Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug); @@ -819,16 +853,16 @@ Config.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols); for (auto Arg : InputArgs.filtered(STRIP_keep_section)) - Config.KeepSection.addMatcher({Arg->getValue(), UseRegexp}); + Config.KeepSection.addMatcher({Arg->getValue(), SectionMatchStyle}); for (auto Arg : InputArgs.filtered(STRIP_remove_section)) - Config.ToRemove.addMatcher({Arg->getValue(), UseRegexp}); + Config.ToRemove.addMatcher({Arg->getValue(), SectionMatchStyle}); for (auto Arg : InputArgs.filtered(STRIP_strip_symbol)) - Config.SymbolsToRemove.addMatcher({Arg->getValue(), UseRegexp}); + Config.SymbolsToRemove.addMatcher({Arg->getValue(), SymbolMatchStyle}); for (auto Arg : InputArgs.filtered(STRIP_keep_symbol)) - Config.SymbolsToKeep.addMatcher({Arg->getValue(), UseRegexp}); + Config.SymbolsToKeep.addMatcher({Arg->getValue(), SymbolMatchStyle}); if (!InputArgs.hasArg(STRIP_no_strip_all) && !Config.StripDebug && !Config.StripUnneeded && Config.DiscardMode == DiscardType::None && diff --git a/llvm/unittests/Support/RegexTest.cpp b/llvm/unittests/Support/RegexTest.cpp --- a/llvm/unittests/Support/RegexTest.cpp +++ b/llvm/unittests/Support/RegexTest.cpp @@ -131,6 +131,55 @@ EXPECT_EQ("abc\\{1\\\\,2\\}", Regex::escape("abc{1\\,2}")); } +TEST_F(RegexTest, ShellWildcard) { + // Globs are converted to '.*'. + EXPECT_EQ("a.*b", Regex::fromShellWildcard("a*b")); + + // Single-character wildcard converted to '.'. + EXPECT_EQ("a.b", Regex::fromShellWildcard("a?b")); + + // Character classes and negations. + EXPECT_EQ("a[x-y]b", Regex::fromShellWildcard("a[x-y]b")); + EXPECT_EQ("a[^x-y]b", Regex::fromShellWildcard("a[^x-y]b")); + EXPECT_EQ("a[^x-y]b", Regex::fromShellWildcard("a[!x-y]b")); + EXPECT_EQ("[x-y]", Regex::fromShellWildcard("[x-y]")); + EXPECT_EQ("[^x-y]", Regex::fromShellWildcard("[^x-y]")); + EXPECT_EQ("[^x-y]", Regex::fromShellWildcard("[!x-y]")); + EXPECT_EQ("[[:alpha:]]", Regex::fromShellWildcard("[[:alpha:]]")); + + // Don't convert wildcard characters inside character classes. + EXPECT_EQ("a[*]b", Regex::fromShellWildcard("a[*]b")); + EXPECT_EQ("a[x-y*]b", Regex::fromShellWildcard("a[x-y*]b")); + EXPECT_EQ("a[^x-y?]b", Regex::fromShellWildcard("a[^x-y?]b")); + EXPECT_EQ("a[{}]b", Regex::fromShellWildcard("a[{}]b")); + EXPECT_EQ("a[]]b", Regex::fromShellWildcard("a[]]b")); + EXPECT_EQ("a[]*]b", Regex::fromShellWildcard("a[]*]b")); + EXPECT_EQ("a[^]*]b", Regex::fromShellWildcard("a[^]*]b")); + + // Escapes passed through, and take the next character verbatim. + EXPECT_EQ("a\\b", Regex::fromShellWildcard("a\\b")); + EXPECT_EQ("a\\*\\?", Regex::fromShellWildcard("a\\*\\?")); + EXPECT_EQ("a\\*.*", Regex::fromShellWildcard("a\\**")); + + // Check for situations with trailing brackets. + EXPECT_EQ("].*", Regex::fromShellWildcard("]*")); + EXPECT_EQ("]\\$", Regex::fromShellWildcard("]$")); + EXPECT_EQ("[[].*", Regex::fromShellWildcard("[[]*")); + + // Check for other collation-related bracket symbols edge cases. + EXPECT_EQ("[[:alpha:]*].*", Regex::fromShellWildcard("[[:alpha:]*]*")); + EXPECT_EQ("[[.foo.]*].*", Regex::fromShellWildcard("[[.foo.]*]*")); + EXPECT_EQ("[[=foo=]*].*", Regex::fromShellWildcard("[[=foo=]*]*")); + + // Invalid cases that should not crash. + EXPECT_EQ("[[:foo", Regex::fromShellWildcard("[[:foo")); + EXPECT_EQ("[[:foo]", Regex::fromShellWildcard("[[:foo]")); + + // Other regular expression characters should be escaped. + EXPECT_EQ("\\^x1\\(\\)\\|\\+\\.\\{\\}\\$", + Regex::fromShellWildcard("^x1()|+.{}$")); +} + TEST_F(RegexTest, IsValid) { std::string Error; EXPECT_FALSE(Regex("(foo").isValid(Error));